Merge third_party/brotli/src from https://chromium.googlesource.com/external/font-compression-reference.git at 931479d735e1121548c3b07aec866fd772cc4932

This commit was generated by merge_from_chromium.py.

Change-Id: I0ce348d2a5ef41ea7bd5d2361fa096a634e209f0
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e74c256
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+// Copyright (c) 2011 Google Inc. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README b/README
new file mode 100644
index 0000000..df97cc2
--- /dev/null
+++ b/README
@@ -0,0 +1,160 @@
+This is a README for the font compression reference code. There are several
+compression related modules in this repository.
+
+brotli/ contains reference code for the Brotli byte-level compression
+algorithm. Note that it is licensed under an Apache 2 license.
+
+src/ contains prototype Java code for compressing fonts.
+
+cpp/ contains prototype C++ code for decompressing fonts.
+
+docs/ contains documents describing the proposed compression format.
+
+= How to run the compression test tool =
+
+This document documents how to run the compression reference code. At this
+writing, the code, while it is intended to produce a bytestream that can be
+reconstructed into a working font, the reference decompression code is not
+done, and the exact format of that bytestream is subject to change.
+
+== Building the tool ==
+
+On a standard Unix-style environment, it should be as simple as running “ant”.
+
+The tool depends on sfntly for much of the font work. The lib/ directory
+contains a snapshot jar. If you want to use the latest sfntly sources, then cd
+to the java subdirectory, run “ant”, then copy these files dist/lib/sfntly.jar
+dist/tools/conversion/eot/eotconverter.jar and
+dist.tools/conversion/woff/woffconverter.jar to $(thisproject)/lib:
+
+dist/lib/sfntly.jar dist/tools/conversion/eot/eotconverter.jar
+dist.tools/conversion/woff/woffconverter.jar
+
+There’s also a dependency on guava (see references below).
+
+The dependencies are subject to their own licenses.
+
+== Setting up the test ==
+
+A run of the tool evaluates a “base” configuration plus one or more test
+configurations, for each font. It measures the file size of the test as a ratio
+over the base file size, then graphs the value of that ratio sorted across all
+files given on the command line.
+
+The test parameters are set by command line options (an improvement from the
+last snapshot). The base is set by the -b command line option, and the
+additional tests are specified by repeated -x command line options (see below).
+
+Each test is specified by a string description. It is a colon-separated list of
+stages. The final stage is entropy compression and can be one of “gzip”,
+“lzma”, “bzip2”, “woff”, “eot” (with actual wire-format MTX compression), or
+“uncomp” (for raw, uncompressed TTF’s). Also, the new wire-format draft
+WOFF2 spec is available as "woff2", and takes an entropy coding as an
+optional argument, as in "woff2/gzip" or "woff2/lzma".
+
+Other stages may optionally include subparameters (following a slash, and
+comma-separated). The stages are:
+
+glyf: performs glyf-table preprocessing based on MTX. There are subparameters:
+1. cbbox (composite bounding box). When specified, the bounding box for
+composite glyphs is included, otherwise stripped 2. sbbox (simple bounding
+box). When specified, the bounding box for simple glyphs is included 3. code:
+the bytecode is separated out into a separate stream 4. triplet: triplet coding
+(as in MTX) is used 5. push: push sequences are separated; if unset, pushes are
+kept inline in the bytecode 6. reslice: components of the glyf table are
+separated into individual streams, taking the MTX idea of separating the
+bytecodes further.
+
+hmtx: strips lsb’s from the hmtx table. Based on the idea that lsb’s can be
+reconstructed from bbox.
+
+hdmx: performs the delta coding on hdmx, essentially the same as MTX.
+
+cmap: compresses cmap table: wire format representation is inverse of cmap
+table plus exceptions (one glyph encoded by multiple character codes).
+
+kern: compresses kern table (not robust, intended just for rough testing).
+
+strip: the subparameters are a list of tables to be stripped entirely
+(comma-separated).
+
+The string roughly corresponding to MTX is:
+
+glyf/cbbox,code,triplet,push,hop:hdmx:gzip
+
+Meaning: glyph encoding is used, with simple glyph bboxes stripped (but
+composite glyph bboxes included), triplet coding, push sequences, and hop
+codes. The hdmx table is compressed. And finally, gzip is used as the entropy
+coder.
+
+This differs from MTX in a number of small ways: LZCOMP is not exactly the same
+as gzip. MTX uses three separate compression streams (the base font including
+triplet-coded glyph data), the bytecodes, and the push sequences, while this
+test uses a single stream. MTX also compresses the CVT table (an upper bound on
+the impact of this can be estimated by testing strip/cvt)
+
+Lastly, as a point of methodology, the code by default strips the “dsig” table,
+which would be invalidated by any non-bit-identical change to the font data. If
+it is desired to keep this table, add the “keepdsig” stage.
+
+The string representing the currently most aggressive optimization level is:
+
+glyf/triplet,code,push,reslice:hdmx:hmtx:cmap:kern:lzma
+
+In addition to the MTX one above, it strips the bboxes from composite glyphs,
+reslices the glyf table, compresses the htmx, cmap, and kern tables, and uses
+lzma as the entropy coding.
+
+The string corresponding to the current WOFF Ultra Condensed draft spec
+document is:
+
+glyf/cbbox,triplet,code,reslice:woff2/lzma
+
+The current C++ codebase can roundtrip compressed files as long as no per-table
+entropy coding is specified, as below (this will be fixed soon).
+
+glyf/cbbox,triplet,code,reslice:woff2
+
+
+== Running the tool ==
+
+java -jar build/jar/compression.jar *.ttf > chart.html
+
+The tool takes a list of OpenType fonts on the commandline, and generates an
+HTML chart, which it simply outputs to stdout. This chart uses the Google Chart
+API for plotting.
+
+Options:
+
+-b <desc>
+
+Sets the baseline experiment description.
+
+[ -x <desc> ]...
+
+Sets an experiment description. Can be used multiple times.
+
+-o
+
+Outputs the actual compressed file, substituting ".wof2" for ".ttf" in
+the input file name. Only useful when a single -x parameter is specified.
+
+= Decompressing the fonts =
+
+See the cpp/ directory (including cpp/README) for the C++ implementation of
+decompression. This code is based on OTS, and successfully roundtrips the
+basic compression as described in the draft spec.
+
+= References =
+
+sfntly: http://code.google.com/p/sfntly/ Guava:
+http://code.google.com/p/guava-libraries/ MTX:
+http://www.w3.org/Submission/MTX/
+
+Also please refer to documents (currently Google Docs):
+
+WOFF Ultra Condensed file format: proposals and discussion of wire format
+issues (PDF is in docs/ directory)
+
+WIFF Ultra Condensed: more discussion of results and compression techniques.
+This tool was used to prepare the data in that document.
diff --git a/brotli/LICENSE b/brotli/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/brotli/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/brotli/brotlispec.txt b/brotli/brotlispec.txt
new file mode 100644
index 0000000..5a32a2d
--- /dev/null
+++ b/brotli/brotlispec.txt
@@ -0,0 +1,1278 @@
+J. Alakuijala
+Z. Szabadka
+              ______   _______  _______  _______ _________
+             (  __  \ (  ____ )(  ___  )(  ____ \\__   __/
+             | (  \  )| (    )|| (   ) || (    \/   ) (
+             | |   ) || (____)|| (___) || (__       | |
+             | |   | ||     __)|  ___  ||  __)      | |
+             | |   ) || (\ (   | (   ) || (         | |
+             | (__/  )| ) \ \__| )   ( || )         | |
+             (______/ |/   \__/|/     \||/          )_(
+
+
+   DRAFT of
+   Brotli Compression Algorithm Compressed Data Format Specification 1.0
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  This memo
+   does not specify an Internet standard of any kind.  Distribution of
+   this memo is unlimited.
+
+Notices
+
+   Copyright (c) 2013  J. Alakuijala and Z. Szabadka
+
+   Permission is granted to copy and distribute this document for any
+   purpose and without charge, including translations into other
+   languages and incorporation into compilations, provided that the
+   copyright notice and this notice are preserved, and that any
+   substantive changes or deletions from the original are clearly
+   marked.
+
+Abstract
+
+   This specification defines a lossless compressed data format that
+   compresses data using a combination of the LZ77 algorithm and Huffman
+   coding, with efficiency comparable to the best currently available
+   general-purpose compression methods.
+
+1. Introduction
+
+   1.1. Purpose
+
+      The purpose of this specification is to define a lossless
+      compressed data format that:
+         * Is independent of CPU type, operating system, file system,
+           and character set, and hence can be used for interchange;
+         * Can be produced or consumed, even for an arbitrarily long
+           sequentially presented input data stream, using only an a
+           priori bounded amount of intermediate storage, and hence
+           can be used in data communications or similar structures
+           such as Unix filters;
+         * Compresses data with efficiency comparable to the best
+           currently available general-purpose compression methods,
+           and in particular considerably better than the gzip program;
+         * Decompresses much faster than the LZMA implementations.
+
+      The data format defined by this specification does not attempt to:
+         * Allow random access to compressed data;
+         * Compress specialized data (e.g., raster graphics) as well
+           as the best currently available specialized algorithms.
+
+   1.2. Intended audience
+
+      This specification is intended for use by implementors of software
+      to compress data into "brotli" format and/or decompress data from
+      "brotli" format.
+
+      The text of the specification assumes a basic background in
+      programming at the level of bits and other primitive data
+      representations. Familiarity with the technique of Huffman coding
+      is helpful but not required.
+
+      This specification uses heavily the notations and terminology
+      introduced in the DEFLATE format specification (RFC 1951, see
+      reference [3] below). For the sake of completeness, we always
+      include the whole text of the relevant parts of RFC 1951,
+      therefore familiarity with the DEFLATE format is helpful but not
+      required.
+
+   1.3. Scope
+
+      The specification specifies a method for representing a sequence
+      of bytes as a (usually shorter) sequence of bits, and a method for
+      packing the latter bit sequence into bytes.
+
+   1.4. Compliance
+
+      Unless otherwise indicated below, a compliant decompressor must be
+      able to accept and decompress any data set that conforms to all
+      the specifications presented here; a compliant compressor must
+      produce data sets that conform to all the specifications presented
+      here.
+
+   1.5.  Definitions of terms and conventions used
+
+      Byte: 8 bits stored or transmitted as a unit (same as an octet).
+      For this specification, a byte is exactly 8 bits, even on machines
+      which store a character on a number of bits different from eight.
+      See below, for the numbering of bits within a byte.
+
+      String: a sequence of arbitrary bytes.
+
+      Bytes stored within a computer do not have a "bit order", since
+      they are always treated as a unit.  However, a byte considered as
+      an integer between 0 and 255 does have a most- and least-
+      significant bit, and since we write numbers with the most-
+      significant digit on the left, we also write bytes with the most-
+      significant bit on the left.  In the diagrams below, we number the
+      bits of a byte so that bit 0 is the least-significant bit, i.e.,
+      the bits are numbered:
+
+         +--------+
+         |76543210|
+         +--------+
+
+      Within a computer, a number may occupy multiple bytes.  All
+      multi-byte numbers in the format described here are stored with
+      the least-significant byte first (at the lower memory address).
+      For example, the decimal number 520 is stored as:
+
+             0        1
+         +--------+--------+
+         |00001000|00000010|
+         +--------+--------+
+          ^        ^
+          |        |
+          |        + more significant byte = 2 x 256
+          + less significant byte = 8
+
+
+      1.5.1. Packing into bytes
+
+         This document does not address the issue of the order in which
+         bits of a byte are transmitted on a bit-sequential medium,
+         since the final data format described here is byte- rather than
+         bit-oriented.  However, we describe the compressed block format
+         in below, as a sequence of data elements of various bit
+         lengths, not a sequence of bytes.  We must therefore specify
+         how to pack these data elements into bytes to form the final
+         compressed byte sequence:
+
+             * Data elements are packed into bytes in order of
+               increasing bit number within the byte, i.e., starting
+               with the least-significant bit of the byte.
+             * Data elements other than Huffman codes are packed
+               starting with the least-significant bit of the data
+               element.
+             * Huffman codes are packed starting with the most-
+               significant bit of the code.
+
+         In other words, if one were to print out the compressed data as
+         a sequence of bytes, starting with the first byte at the
+         *right* margin and proceeding to the *left*, with the most-
+         significant bit of each byte on the left as usual, one would be
+         able to parse the result from right to left, with fixed-width
+         elements in the correct MSB-to-LSB order and Huffman codes in
+         bit-reversed order (i.e., with the first bit of the code in the
+         relative LSB position).
+
+2. Compressed representation overview
+
+   A compressed data set consists of a header and a series of meta-
+   blocks, corresponding to successive meta-blocks of input data. The
+   meta-block sizes are limited to bytes and the maximum meta-block size
+   is 268,435,456 bytes.
+
+   The header contains the size of a sliding window on the input data
+   that is sufficient to keep on the intermediate storage at any given
+   point during decoding the stream.
+
+   Each meta-block is compressed using a combination of the LZ77
+   algorithm (Lempel-Ziv 1977, see reference [2] below) and Huffman
+   coding. The Huffman trees for each block are independent of those for
+   previous or subsequent blocks; the LZ77 algorithm may use a
+   reference to a duplicated string occurring in a previous meta-block,
+   up to sliding window size input bytes before.
+
+   Each meta-block consists of two parts: a meta-block header that
+   describes the representation of the compressed data part, and a
+   compressed data part. The compressed data consists of a series of
+   commands. Each command consists of two parts: a sequence of literal
+   bytes (of strings that have not been detected as duplicated within
+   the sliding window), and a pointer to a duplicated string,
+   represented as a pair <length, backward distance>.
+
+   Each command in the compressed data is represented using three kinds
+   of Huffman codes: one kind of code tree for the literal sequence
+   lengths (also referred to as literal insertion lengths) and backward 
+   copy lengths (that is, a single code word represents two lengths,
+   one of the literal sequence and one of the backward copy), a separate
+   kind of code tree for literals, and a third kind of code tree for
+   distances. The code trees for each meta-block appear in a compact
+   form just before the compressed data in the meta-block header.
+
+   The sequence of each type of value in the representation of a command
+   (insert-and-copy lengths, literals and distances) within a meta-
+   block is further divided into blocks. In the "brotli" format, blocks
+   are not contiguous chunks of compressed data, but rather the pieces
+   of compressed data belonging to a block are interleaved with pieces
+   of data belonging to other blocks. Each meta-block can be logically
+   decomposed into a series of insert-and-copy length blocks, a series
+   of literal blocks and a series of distance blocks. These are also
+   called the three block categories: a meta-block has a series of
+   blocks for each block category. Note that the physical structure of
+   the meta-block is a series of commands, while the three series of
+   blocks is the logical structure. Consider the following example:
+
+      (IaC0, L0, L1, L2, D0)(IaC1, D1)(IaC2, L3, L4, D2)(IaC3, L5, D3)
+
+   The meta-block here has 4 commands, and each three types of symbols
+   within these commands can be rearranged into for example the
+   following logical block structure:
+
+      [IaC0, IaC1][IaC2, IaC3]  <-- block types 0 and 1
+
+      [L0, L1][L2, L3, L4][L5]  <-- block types 0, 1, and 0
+
+      [D0][D1, D2, D3]          <-- block types 0 and 1
+
+   The subsequent blocks within each block category must have different
+   block types, but blocks further away in the block sequence can have
+   the same types. The block types are numbered from 0 to the maximum
+   block type number of 255 and the first block of each block category
+   must have type 0. The block structure of a meta-block is represented
+   by the sequence of block-switch commands for each block category,
+   where a block-switch command is a pair <block type, block length>.
+   The block-switch commands are represented in the compressed data
+   before the start of each new block using a Huffman code tree for
+   block types and a separate Huffman code tree for block lengths for
+   each block category. In the above example the physical layout of the
+   meta-block is the following:
+
+      IaC0 L0 L1 LBlockSwitch(1, 3) L2 D0 IaC1 DBlockSwitch(1, 1) D1
+      IaCBlockSwitch(1, 2) IaC2 L3 L4 D2 IaC3 LBlockSwitch(0, 1) D3
+
+   Note that the block switch commands for the first blocks are not part
+   of the meta-block compressed data part, they are encoded in the meta-
+   block header. The code trees for block types and lengths (total of
+   six Huffman code trees) appear in a compact form in the meta-block
+   header.
+
+   Each type of value (insert-and-copy lengths, literals and distances) 
+   can be encoded with any Huffman tree from a collection of Huffman
+   trees of the same kind appearing in the meta-block header. The
+   particual Huffman tree used can depend on two factors: the block type
+   of the block the value appears in, and the context of the value. In
+   the case of the literals, the context is the previous two bytes in
+   the input data, and in the case of distances, the context is the copy
+   length from the same command. For insert-and-copy lengths, no context
+   is used and the Huffman tree depends only on the block type (in fact,
+   the index of the Huffman tree is the block type number). In the case 
+   of literals and distances, the context is mapped to a context id in
+   the rage [0, 63] for literals and [0, 3] for distances and the matrix
+   of the Huffman tree indices for each block type and context id,
+   called the context map, is encoded in a compact form in the meta-
+   block header.
+
+   In addition to the parts listed above (Huffman code trees for insert-
+   and-copy lengths, literals, distances, block types and block lengths
+   and the context map), the meta-block header contains the number of
+   input bytes in the meta-block and two additional parameters used in
+   the representation of copy distances (number of "postfix bits" and
+   number of direct distance codes).
+
+3. Compressed representation of Huffman codes
+
+   3.1. Introduction to prefix and Huffman coding
+
+      Prefix coding represents symbols from an a priori known alphabet
+      by bit sequences (codes), one code for each symbol, in a manner
+      such that different symbols may be represented by bit sequences of
+      different lengths, but a parser can always parse an encoded string
+      unambiguously symbol-by-symbol.
+
+      We define a prefix code in terms of a binary tree in which the two
+      edges descending from each non-leaf node are labeled 0 and 1 and
+      in which the leaf nodes correspond one-for-one with (are labeled
+      with) the symbols of the alphabet; then the code for a symbol is
+      the sequence of 0's and 1's on the edges leading from the root to 
+      the leaf labeled with that symbol.  For example:
+
+                       /\              Symbol    Code
+                      0  1             ------    ----
+                     /    \                A      00
+                    /\     B               B       1
+                   0  1                    C     011
+                  /    \                   D     010
+                 A     /\
+                      0  1
+                     /    \
+                    D      C
+
+      A parser can decode the next symbol from an encoded input stream
+      by walking down the tree from the root, at each step choosing the 
+      edge corresponding to the next input bit.
+
+      Given an alphabet with known symbol frequencies, the Huffman
+      algorithm allows the construction of an optimal prefix code (one
+      which represents strings with those symbol frequencies using the
+      fewest bits of any possible prefix codes for that alphabet). Such
+      a code is called a Huffman code. (See reference [1] in Chapter 5,
+      references for additional information on Huffman codes.)
+
+      Note that in the "brotli" format, the Huffman codes for the
+      various alphabets must not exceed certain maximum code lengths.
+      This constraint complicates the algorithm for computing code
+      lengths from symbol frequencies. Again, see Chapter 5, references
+      for details.
+
+   3.2. Use of Huffman coding in the "brotli" format
+
+      The Huffman codes used for each alphabet in the "brotli" format
+      are canonical Huffman codes, which have two additional rules:
+
+         * All codes of a given bit length have lexicographically
+           consecutive values, in the same order as the symbols they
+           represent;
+
+         * Shorter codes lexicographically precede longer codes.
+
+      We could recode the example above to follow this rule as follows,
+      assuming that the order of the alphabet is ABCD:
+
+         Symbol  Code
+         ------  ----
+         A       10
+         B       0
+         C       110
+         D       111
+
+      I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are
+      lexicographically consecutive.
+
+      Given this rule, we can define the canonical Huffman code for an
+      alphabet just by giving the bit lengths of the codes for each
+      symbol of the alphabet in order; this is sufficient to determine
+      the actual codes. In our example, the code is completely defined
+      by the sequence of bit lengths (2, 1, 3, 3). The following
+      algorithm generates the codes as integers, intended to be read
+      from most- to least-significant bit. The code lengths are
+      initially in tree[I].Len; the codes are produced in tree[I].Code.
+
+         1)  Count the number of codes for each code length.  Let
+             bl_count[N] be the number of codes of length N, N >= 1.
+
+         2)  Find the numerical value of the smallest code for each
+             code length:
+
+                code = 0;
+                bl_count[0] = 0;
+                for (bits = 1; bits <= MAX_BITS; bits++) {
+                    code = (code + bl_count[bits-1]) << 1;
+                    next_code[bits] = code;
+                }
+
+         3)  Assign numerical values to all codes, using consecutive
+             values for all codes of the same length with the base
+             values determined at step 2. Codes that are never used
+             (which have a bit length of zero) must not be assigned a
+             value.
+
+                for (n = 0;  n <= max_code; n++) {
+                    len = tree[n].Len;
+                    if (len != 0) {
+                        tree[n].Code = next_code[len];
+                        next_code[len]++;
+                    }
+                }
+
+      Example:
+
+      Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3, 3,
+      2, 4, 4).  After step 1, we have:
+
+         N      bl_count[N]
+         -      -----------
+         2      1
+         3      5
+         4      2
+
+      Step 2 computes the following next_code values:
+
+         N      next_code[N]
+         -      ------------
+         1      0
+         2      0
+         3      2
+         4      14
+
+      Step 3 produces the following code values:
+
+         Symbol Length   Code
+         ------ ------   ----
+         A       3        010
+         B       3        011
+         C       3        100
+         D       3        101
+         E       3        110
+         F       2         00
+         G       4       1110
+         H       4       1111
+
+   3.3. Alphabet sizes
+
+      Huffman codes are used for different purposes in the "brotli"
+      format, and each purpose has a different alphabet size. For
+      literal codes, the alphabet size is 256. For insert-and-copy
+      length codes, the alphabet size is 704. For block length codes,
+      the alphabet size is 26. For distance codes, block type codes and
+      the Huffman codes used in compressing the context map, the
+      alphabet size is dynamic and is based on other parameters.
+
+   3.4. Simple Huffman codes
+
+      The first bit of the compressed representation of each Huffman
+      code distinguishes between simple and complex Huffman codes. If
+      the first bit is 1, then a simple, otherwise a complex Huffman
+      code follows.
+
+      A simple Huffman code can have only up to four symbols with non-
+      zero code length. The format of the simple Huffman code is as
+      follows:
+
+            1 bit:  1, indicating a simple Huffman code
+            2 bits: NSYM - 1, where NSYM = # of symbols with non-zero
+                    code length
+
+            NSYM symbols, each encoded using ALPHABET_BITS bits
+
+            1 bit:  tree-select, present only for NSYM = 4
+
+      The value of ALPHABET_BITS depends on the alphabet of the Huffman
+      code: it is the smallest number of bits that can represent all
+      symbols in the alphabet. E.g. for the alphabet of literal bytes,
+      ALPHABET_BITS is 8.
+
+      The (non-zero) code lengths of the symbols can be reconstructed as
+      follows:
+
+         * if NSYM = 1, the code length for the one symbol is one at
+           this stage, but only to distinguish it from the other zero
+           code length symbols, when encoding this symbol in the
+           compressed data stream using this Huffman code later, no
+           actual bits are emitted. Similarly, when decoding a symbol
+           using this Huffman code, no bits are read and the one symbol
+           is returned.
+
+        * if NSYM = 2, both symbols have code length 1.
+
+        * if NSYM = 3, the code lengths for the symbols are 1, 2, 2 in
+          the order they appear in the representation of the simple
+          Huffman code.
+
+        * if NSYM = 4, the code lengths (in order of symbols decoded)
+          depend on the tree-select bit: 2, 2, 2, 2, (tree-select bit 0)
+          or 1, 2, 3, 3 (tree-select bit 1).
+
+   3.5. Complex Huffman codes
+
+      A complex Huffman code is a canonical Huffman code, defined by the
+      sequence of code lengths, as discussed in Paragraph 3.2, above.
+      For even greater compactness, the code length sequences themselves
+      are compressed using a Huffman code. The alphabet for code lengths
+      is as follows:
+
+            0 - 15: Represent code lengths of 0 - 15
+                16: Copy the previous non-zero code length 3 - 6 times
+                    The next 2 bits indicate repeat length
+                          (0 = 3, ... , 3 = 6)
+                    If this is the first code length, or all previous
+                    code lengths are zero, a code length of 8 is
+                    repeated 3 - 6 times
+                    A repeated code length code of 16 modifies the
+                    repeat count of the previous one as follows:
+                       repeat count = (4 * (repeat count - 2)) +
+                                      (3 - 6 on the next 2 bits)
+                    Example:  Codes 7, 16 (+2 bits 11), 16 (+2 bits 10)
+                              will expand to 22 code lengths of 7
+                              (1 + 4 * (6 - 2) + 5)
+                17: Repeat a code length of 0 for 3 - 10 times.
+                    (3 bits of length)
+                    A repeated code length code of 17 modifies the
+                    repeat count of the previous one as follows:
+                       repeat count = (8 * (repeat count - 2)) +
+                                      (3 - 10 on the next 3 bits)
+
+      A code length of 0 indicates that the corresponding symbol in the
+      alphabet will not occur in the compressed data, and should not
+      participate in the Huffman code construction algorithm given
+      earlier.
+
+      The bit lengths of the Huffman code over the code length alphabet
+      are compressed with the following static Huffman code:
+
+               Symbol   Code
+               ------   ----
+               0          00
+               1        1010
+               2         100
+               3          11
+               4          01
+               5        1011
+
+      We can now define the format of the complex Huffman code as
+      follows:
+
+            1 bit:  0, indicating a complex Huffman code
+            4 bits: HCLEN, # of code length codes - 3
+            1 bit : HSKIP, if 1, skip over first two code length codes
+
+            (HCLEN + 3 - 2 * HSKIP) code lengths for symbols in the code
+               length alphabet given just above, in the order: 1, 2, 3,
+               4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
+
+               If HSKIP is 1, code lengths of code length symbols 1 and
+               2 are implicit zeros. Code lengths of code length symbols
+               beyond the (HCLEN + 4)th in the ordering above are also
+               implicit zeros.
+
+               The code lengths of code length symbols are between 0 and
+               5 and they are represented with 2 - 5 bits according to
+               the static Huffman code above. A code length of 0 means
+               the corresponding code length symbol is not used.
+
+            1 bit:  HLENINC, if 1, the number of code length symbols is
+                    encoded next
+
+          7-8 bits: HLEN, # of code length symbols, with the following
+                    encoding: values 4 - 67 with bit pattern 0xxxxxx,
+                    values 68 - 195 with bit pattern 1xxxxxxx, appears
+                    only if HLENINC = 1
+
+            Sequence of code lengths symbols, encoded using the code
+               length Huffman code. The number of code length symbols
+               is either HLEN (in case of HLENINC = 1), or as many as is
+               needed to assign a code length to each symbol in the
+               alphabet (i.e. the alphabet size minus the sum of all the
+               repeat lengths defined by extra bits of code length
+               symbols 16 and 17). In case of HLENINC = 1, all symbols
+               not assigned a code length have implicit code length 0.
+
+   3.6. Validity of the Huffman code
+
+      There are two kinds of valid Huffman codes:
+         * Huffman code that contains one symbol of length 1, and
+         * Full canonical Huffman code.
+
+      A decoder can check if the Huffman code is full by using integer
+      arithmetic, by computing if the sum of (32768 right-shifted by
+      code-length) over the non-zero code lengths leads to a total sum
+      of 32768. However, if there is only one non-zero code-length, it
+      shall have an implicit code length of one and the code is
+      considered valid.
+
+4. Encoding of distances
+
+   As described in Section 2, one component of a compressed meta-block
+   is a sequence of backward distances. In this section we provide the
+   details to the encoding of distances.
+
+   Each distance in the compressed data part of a meta-block is
+   represented with a pair <distance code, extra bits>. The distance
+   code and the extra bits are encoded back-to-back, the distance code
+   is encoded using a Huffman code over the distance code alphabet,
+   while the extra bits value is encoded as a fixed-width machine
+   integer. The number of extra bits can be 0 - 24, and it is dependent
+   on the distance code.
+
+   To convert a distance code and associated extra bits to a backward
+   distance, we need the sequence of past distances and two additonal
+   parameters, the number of "postfix bits", denoted by NPOSTFIX, and
+   the number of direct distance codes, denoted by NDIRECT. Both of
+   these parameters are encoded in the meta-block header. We will also
+   use the folowing derived parameter:
+
+      POSTFIX_MASK = ((1 << NPOSTFIX) - 1)
+
+   The first 16 distance codes are special short codes that reference
+   past distances as follows:
+
+         0: last distance
+         1: second last distance
+         2: third last distance
+         3: fourth last distance
+         4: last distance - 1
+         5: last distance + 1
+         6: last distance - 2
+         7: last distance + 2
+         8: last distance - 3
+         9: last disatnce + 3
+        10: second last distance - 1
+        11: second last distance + 1
+        12: second last distance - 2
+        13: second last distance + 2
+        14: second last distance - 3
+        15: second last distance + 3
+
+   The ring-buffer of four last distances is initialized by the values
+   16, 15, 11 and 4 (i.e. the fourth last is set to 16, the third last
+   to 15, the second last to 11 and the last distance to 4) at the
+   beginning of the *stream* (as opposed to the beginning of the meta-
+   block) and it is not reset at meta-block boundaries. When a distance
+   code 0 appears, the distance it represents (i.e. the last distance
+   in the sequence of distances) is not pushed to the ring-buffer of
+   last distances, in other words, the expression "(second, third,
+   fourth) last distance" means the (second, third, fourth) last
+   distance that was not represented by a 0 distance code.
+
+   The next NDIRECT distance codes, from 16 to 15 + NDIRECT, represent
+   distances from 1 to NDIRECT. Neither the distance short codes, nor
+   the NDIRECT direct distance codes have any extra bits.
+
+   Distance codes 16 + NDIRECT and greater all have extra bits, the
+   number of extra bits for a distance code "dcode" is given by the
+   following formula:
+
+      ndistbits = 1 + ((dcode - NDIRECT - 16) >> (NPOSTFIX + 1))
+
+   The maximum number of extra bits is 24, therefore the size of the
+   distance code alphabet is (16 + NDIRECT + (48 << NPOSTFIX)).
+
+   Given a distance code "dcode" (>= 16 + NDIRECT), and extra bits
+   "dextra", the backward distance is given by the following formula:
+
+      hcode = (dcode - NDIRECT - 16) >> NPOSTFIX
+      lcode = (dcode - NDIRECT - 16) & POSTFIX_MASK
+      offset = ((2 + (hcode & 1)) << ndistbits) - 4;
+      distance = ((offset + dextra) << NPOSTFIX) + lcode + NDIRECT + 1
+
+5. Encoding of literal insertion lengths and copy lengths
+
+   As described in Section 2, the literal insertion lengths and backward
+   copy lengths are encoded using a single Huffman code. This section
+   provides the details to this encoding.
+
+   Each <insertion length, copy length> pair in the compressed data part
+   of a meta-block is represented with the following triplet:
+
+      <insert-and-copy length code, insert extra bits, copy extra bits>
+
+   The insert-and-copy length code, the insert extra bits and the copy
+   extra bits are encoded back-to-back, the insert-and-copy length code
+   is encoded using a Huffman code over the insert-and-copy length code
+   alphabet, while the extra bits values are encoded as fixed-width
+   machine integers. The number of insert and copy extra bits can be
+   0 - 24, and they are dependent on the insert-and-copy length code.
+
+   Some of the insert-and-copy length codes also express the fact that
+   the distance code of the distance in the same command is 0, i.e. the
+   distance component of the command is the same as that of the previous
+   command. In this case, the distance code and extra bits for the 
+   distance are omitted from the compressed data stream.
+
+   We describe the insert-and-copy length code alphabet in terms of the
+   (not directly used) insert length code and copy length code
+   alphabets. The symbols of the insert length code alphabet, along with
+   the number of insert extra bits and the range of the insert lengths
+   are as follows:
+
+           Extra               Extra               Extra
+      Code Bits Lengths   Code Bits Lengths   Code Bits Lengths
+      ---- ---- ------    ---- ---- -------   ---- ---- -------
+       0    0     0        8    2    10-13    16    6   130-193
+       1    0     1        9    2    14-17    17    7   194-321
+       2    0     2       10    3    18-25    18    8   322-527
+       3    0     3       11    3    26-33    19    9   578-1089
+       4    0     4       12    4    34-49    20   10   1090-2113
+       5    0     5       13    4    50-65    21   12   2114-6209
+       6    1    6,7      14    5    66-97    22   14   6210-22593
+       7    1    8,9      15    5    98-129   23   24   22594-16799809
+
+   The symbols of the copy length code alphabet, along with the number
+   of copy extra bits and the range of copy lengths are as follows:
+
+           Extra               Extra               Extra
+      Code Bits Lengths   Code Bits Lengths   Code Bits Lengths
+      ---- ---- ------    ---- ---- -------   ---- ---- -------
+       0    0     2        8    1    10,11    16    5    70-101
+       1    0     3        9    1    12,13    17    5   102-133
+       2    0     4       10    2    14-17    18    6   134-197
+       3    0     5       11    2    18-21    19    7   198-325
+       4    0     6       12    3    22-29    20    8   326-581
+       5    0     7       13    3    30-37    21    9   582-1093
+       6    0     8       14    4    38-53    22   10   1094-2117
+       7    0     9       15    4    54-69    23   24   2118-16779333
+
+   To convert an insert-and-copy length code to an insert length code
+   and a copy length code, the following table can be used:
+
+          Insert
+          length        Copy length code
+          code       0-7       8-15     16-23
+                 +---------+---------+
+                 |         |         |
+            0-7  |   0-63  |  64-127 | <--- distance code 0
+                 |         |         |
+                 +---------+---------+---------+
+                 |         |         |         |
+            0-7  | 128-191 | 192-255 | 383-447 |
+                 |         |         |         |
+                 +---------+---------+---------+
+                 |         |         |         |
+            8-15 | 256-319 | 320-383 | 512-575 |
+                 |         |         |         |
+                 +---------+---------+---------+
+                 |         |         |         |
+           16-23 | 448-551 | 576-639 | 640-703 |
+                 |         |         |         |
+                 +---------+---------+---------+
+
+   First, look up the cell with the 64 value range containing the
+   insert-and-copy length code, this gives the insert length code and
+   and the copy length code ranges, both 8 values long. The copy length
+   code within its range is determined by the lowest 3 bits of the
+   insert-and-copy length code, and the insert length code within its
+   range is determined by bits 3-5 (counted from the LSB) of the insert-
+   and-copy length code. Given the insert length and copy length codes,
+   the actual insert and copy lengths can be obtained by reading the
+   number of extra bits given by the tables above.
+
+   If the insert-and-copy length code is between 0 and 127, the distance
+   code of the command is set to zero (the last distance reused).
+
+6. Encoding of block switch commands
+
+   As described in Section 2, a block-switch command is a pair
+   <block type, block length>. These are encoded in the compressed data
+   part of the meta-block, right before the start of each new block of a
+   particular block category.
+
+   Each block type in the compressed data is represented with a block
+   type code, encoded using a Huffman code over the block type code
+   alphabet. A block type code 0 means that the block type is the same
+   as the type of the second last block from the same block category,
+   while a block type code 1 means that the block type equals the last
+   block type plus one. Block type codes 2 - 257 represent block types
+   0 - 255. The second last and last block types are initialized with 0
+   and 1, respectively, at the beginning of each meta-block.
+
+   The first block type of each block category must be 0, and the block
+   type of the first block switch command is therefore not encoded in
+   the compressed data.
+
+   The number of different block types in each block category, denoted
+   by NBLTYPESL, NBLTYPESI, and NBLTYPESD for literals, insert-and-copy
+   lengths and distances, respectively, is encoded in the meta-block
+   header, and it must equal to the largest block type plus one in that
+   block category. In other words, the set of literal, insert-and-copy
+   length and distance block types must be [0..NBLTYPESL-1],
+   [0..NBLTYPESI-1], and [0..NBLTYPESD-1], respectively. From this it
+   follows that the alphabet size of literal, insert-and-copy length and
+   distance block type codes is NBLTYPES + 2, NBLTYPESI + 2 and
+   NBLTYPESD + 2, respectively.
+
+   Each block length in the compressed data is represented with a pair
+   <block length code, extra bits>. The block length code and the extra
+   bits are encoded back-to-back, the block length code is encoded using
+   a Huffman code over the block length code alphabet, while the extra
+   bits value is encoded as a fixed-width machine integer. The number of
+   extra bits can be 0 - 24, and it is dependent on the block length
+   code. The symbols of the block length code alphabet, along with the
+   number of extra bits and the range of block lengths are as follows:
+
+           Extra               Extra               Extra
+      Code Bits Lengths   Code Bits Lengths   Code Bits Lengths
+      ---- ---- ------    ---- ---- -------   ---- ---- -------
+       0    2    1-4       9    4    65-80    18    7   369-496
+       1    2    5-8      10    4    81-96    19    8   497-752
+       2    2    9-12     11    4    97-112   20    9   753-1264
+       3    2   13-16     12    5   113-144   21   10   1265-2288
+       4    3   17-24     13    5   145-176   22   11   2289-4336
+       5    3   25-32     14    5   177-208   23   12   4337-8432
+       6    3   33-40     15    5   209-240   24   13   8433-16624
+       7    3   41-48     16    6   241-304   25   24   16625-16793840
+       8    4   49-64     17    6   305-368
+
+   The first block switch command of each block category is special in
+   the sense that it is encoded in the meta-block header, and as
+   described earlier the block type code is omitted, since it is an
+   implicit zero.
+
+7. Context modeling
+
+   As described in Section 2, the Huffman tree used to encode a literal
+   byte or a distance code depends on the context id and the block type.
+   This section specifies how to compute the context id for a particular
+   literal and distance code, and how to encode the context map that
+   maps a <context id, block type> pair to the index of a Huffman
+   tree in the array of literal and distance Huffman trees.
+
+   7.1. Context modes and context id lookup for literals
+
+      The context for encoding the next literal is defined by the last
+      two bytes in the stream (p1, p2, where p1 is the most recent
+      byte), regardless if these bytes are produced by backward
+      references or by literal insertions.
+
+      There are four methods, called context modes, to compute the
+      Context ID:
+         * MSB6, where the Context ID is the value of six most
+           significant bits of p1,
+         * LSB6, where the Context ID is the value of six least
+           significant bits of p1,
+         * UTF8, where the Context ID is a complex function of p1, p2,
+           optimized for text compression, and
+         * Signed, where Context ID is a complex function of p1, p2,
+           optimized for compressing sequences of signed integers.
+
+      The Context ID for the UTF8 and Signed context modes is computed
+      using the following lookup tables Lut0, Lut1, and Lut2.
+
+      Lut0 :=
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+        44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+        12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+        52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+        12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+        60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+         0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+         0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+         0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+         0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+         2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+         2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+         2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+         2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
+
+      Lut1 :=
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+         1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+         1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+
+      Lut2 :=
+         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+         6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
+
+      Given p1 = last decoded byte, and p2 = second last decoded byte,
+      the context ids can be computed as follows:
+
+         For LSB6  :  Context ID = p1 & 0x3f
+         For MSB6  :  Context ID = p1 >> 2
+         For UTF8  :  Context ID = Lut0[p1] | Lut1[p2]
+         For Signed:  Context ID = (Lut2[p1] << 3) | Lut2[p2]
+
+      The context modes LSB6, MSB6, UTF8, and Signed are denoted by
+      integers 0, 1, 2, 3.
+
+      The context mode is defined for each literal block type, and they
+      are stored in a consequtive array of bits in the meta-block
+      header, always two bits per block type.
+
+   7.2. Context id for distances
+
+      The context for encoding a distance code is defined by the copy
+      length corresponding to the distance. The context ids are 0, 1, 2,
+      and 3 for copy lengths 2, 3, 4, and more than 4, respectively.
+
+   7.3. Encoding of the context map
+
+      There are two kinds of context maps, for literals and for
+      distances. The size of the context map is 64 * NBLTYPESL for
+      literals, and 4 * NBLTYPESD for distances. Each value in the
+      context map is an integer between 0 and 255, indicating the index
+      of the Huffman tree to be used when encoding the next literal or
+      distance.
+
+      The context map is encoded as a one-dimensional array,
+      CMAPL[0..(64 * NBLTYPESL - 1)] and CMAPD[0..(4 * NBLTYPESD - 1)].
+
+      The index of the Huffman tree for encoding a literal or distance
+      code with context id "cid" and block type "bltype" is
+
+         index of literal Huffman tree = CMAPL[bltype * 64 + cid]
+
+         index of distance Huffman tree = CMAPD[bltype * 4 + cid]
+
+      The values of the context map are encoded with the combination
+      of run length encoding for zero values and Huffman coding. Let
+      RLEMAX denote the number of run length codes and NTREES denote the
+      maximum value in the context map plus one. NTREES must equal the
+      number of different values in the context map, in other words,
+      the different values in the context map must be the [0..NTREES-1]
+      interval. The alphabet of the Huffman code has the following
+      RLEMAX + NTREES symbols:
+
+            0: value zero
+            1: repeat a zero 2-3 times, read 1 bit for repeat length
+            2: repeat a zero 4-7 times, read 2 bits for repeat length
+            ...
+            RLEMAX: repeat a zero (2^RLEMAX)-(2^(RLEMAX+1) - 1) times,
+                    read RLEMAX bits for repeat length
+            RLEMAX + 1: value 1
+            ...
+            RLEMAX + NTREES - 1: value NTREES - 1
+
+      If RLEMAX = 0, the run length coding is not used, and the symbols
+      of the alphabet are directly the values in the context map. We can
+      now define the format of the context map (the same format is used 
+      for literal and distance context maps):
+
+          1-5 bits: RLEMAX, 0 is encoded with one 0 bit, and values
+                    1 - 16 are encoded with bit pattern 1xxxx
+
+            Huffman code with alphabet size NTREES + RLEMAX
+
+            Context map size values encoded with the above Huffman code
+               and run length coding for zero values
+
+            1 bit:  IMTF bit, if set, we do an inverse move-to-front
+                    transform on the values in the context map to get
+                    the Huffman code indexes
+
+      For the encoding of NTREES see Section 9.2.
+
+8. Static dictionary
+
+   At any given point during decoding the compressed data, a reference
+   to a duplicated string in the output produced so far has a maximum
+   backward distance value, which is the minumum of the window size and
+   the number of output bytes produced. However, decoding a distance
+   from the input stream, as described in section 4, can produce
+   distances that are greater than this maximum allowed value. The
+   difference between these distances and the first invalid distance
+   value is treated as reference to a word in the static dictionary
+   given in Appendix A. The maximum valid copy length for a static
+   dictionary reference is 24. The static dictionary has three parts:
+
+      * DICT[0..DICTSIZE], an array of bytes
+      * DOFFSET[0..24], an array of byte offset values for each length
+      * NDBITS[0..24], an array of bit-depth values for each length
+
+   The number of static dictionary words for a given length is:
+
+      NWORDS[length] = 0                       (if length < 3)
+      NWORDS[length] = (1 << NDBITS[lengths])  (if length >= 3)
+
+   DOFFSET and DICTSIZE are defined by the following recursion:
+
+      DOFFSET[0] = 0
+      DOFFSET[length + 1] = DOFFSET[length] + length * NWORDS[length]
+      DICTSIZE = DOFFSET[24] + 24 * NWORDS[24]
+
+   The offset of a word within the DICT array for a given length and
+   index is:
+
+      offset(length, index) = DOFFSET[length] + index * length
+
+   Each static dictionary word has 64 different forms, given by applying
+   a word transformation to a base word in the DICT array. The list of
+   word transformations is given in Appendix B. The static dictionary
+   word for a <length, distance> pair can be reconstructed as follows:
+
+      word_id = distance - (max allowed distance + 1)
+      index = word_id % NWORDS[length]
+      base_word = DICT[offset(length, index)..offset(length, index+1))
+      transform_id = word_id >> NBITS[length]
+
+   The string copied to the output stream is computed by applying the 
+   transformation to the base dictionary word. If transform_id is
+   greater than 63 or length is greater than 24, the compressed data set
+   is invalid and must be discarded.
+
+9. Compressed data format
+
+   In this section we describe the format of the compressed data set in
+   terms of the format of the individual data items described in the
+   previous secions.
+
+   9.1. Format of the stream header
+
+      The stream header has only the following one field:
+
+          1-4 bits: WBITS, a value in the range 16 - 24, value 16 is
+                    encoded with one 0 bit, and values 17 - 24 are
+                    encoded with bit pattern 1xxx
+
+      The size of the sliding window, which is the maximum value of any
+      non-dictionary reference backward distance, is given by the
+      following formula:
+
+         window size = (1 << WBITS) - 16
+
+   9.2. Format of the meta-block header
+
+      A compliant compressed data set has at least one meta-block. Each
+      meta-block contains a header, with information about the
+      uncompressed length of the meta-block, and a bit signaling if the
+      meta-block is the last one. The format of the meta-block header is
+      the following:
+
+            1 bit:  ISLAST, set to 1 if this is the last meta-block
+            1 bit:  ISEMPTY, set to 1 if the meta-block is empty, this
+                    field is only present if ISLAST bit is set, since
+                    only the last meta-block can be empty
+            2 bits: MNIBBLES, (# of nibbles to represent the length) - 4
+
+            (MNIBBLES + 4) x 4 bits: MLEN - 1, where MLEN is the length
+               of the meta-block in the input data in bytes
+
+            1 bit:  ISUNCOMPRESSED, if set to 1, any bits of input up to
+                    the next byte boundary are ignored, and the rest of
+                    the meta-block contains MLEN bytes of literal data;
+                    this field is only present if ISLAST bit is not set
+
+         1-11 bits: NBLTYPESL, # of literal block types, encoded with
+                    the following variable length code:
+
+                          Value   Bit Pattern
+                          -----   -----------
+                            1      0
+                            2      1000
+                           3-4     1001x
+                           5-8     1010xx
+                           9-16    1011xxx
+                          17-32    1100xxxx
+                          33-64    1101xxxxx
+                          65-128   1110xxxxxx
+                         129-256   1111xxxxxxx
+
+            Huffman code over the block type code alphabet for literal
+               block types, appears only if NBLTYPESL >= 2
+
+            Huffman code over the block length code alphabet for literal
+               block lengths, appears only if NBLTYPESL >= 2
+
+            Block length code + Extra bits for first literal block 
+               length, appears only if NBLTYPESL >= 2
+
+         1-11 bits: NBLTYPESI, # of insert-and-copy block types, encoded
+                    with the same variable length code as above
+
+            Huffman code over the block type code alphabet for insert-
+               and-copy block types, only if NBLTYPESI >= 2
+
+            Huffman code over the block length code alphabet for insert-
+               and-copy block lengths, only if NBLTYPESI >= 2
+
+            Block length code + Extra bits for first insert-and-copy
+               block length, only if NBLTYPESI >= 2
+
+         1-11 bits: NBLTYPESD, # of distance block types, encoded with
+                    the same variable length code as above
+
+            Huffman code over the block type code alphabet for distance
+               block types, appears only if NBLTYPESD >= 2
+
+            Huffman code over the block length code alphabet for
+               distance block lengths, only if NBLTYPESD >= 2
+
+            Block length code + Extra bits for first distance block
+               length, only if NBLTYPESD >= 2
+
+            2 bits: NPOSTFIX, parameter used in the distance coding
+
+            4 bits: four most significant bits of NDIRECT, to get the
+                    actual value of the parameter NDIRECT, left-shift
+                    this four bit number by NPOSTFIX bits
+
+            NBLTYPESL x 2 bits: context mode for each literal block type
+
+         1-11 bits: NTREESL, # of literal Huffman trees, encoded with
+                    the same variable length code as NBLTYPESL
+
+            Literal context map, encoded as described in Paragraph 7.3,
+               appears only if NTREESL >= 2, otherwise the context map
+               has only zero values
+
+         1-11 bits: NTREESD, # of distance Huffman trees, encoded with
+                    the same variable length code as NBLTYPESD
+
+            Distance context map, encoded as described in Paragraph 7.3,
+               appears only if NTREESD >= 2, otherwise the context map
+               has only zero values
+
+            NTREESL Huffman codes for literals
+
+            NBLTYPESI Huffman codes for insert-and-copy lengths
+
+            NTREESD Huffman codes for distances
+
+   9.3. Format of the meta-block data
+
+      The compressed data part of a meta-block consists of a series of
+      commands. Each command has the following format:
+
+            Block type code for next insert-and-copy block type, appears
+               only if NBLTYPESI >= 2 and the previous insert-and-copy
+               block has ended
+
+            Block length code + Extra bits for next insert-and-copy
+               block length, appears only if NBLTYPESI >= 2 and the
+               previous insert and-copy block has ended
+
+            Insert-and-copy length, encoded as in section 5, using the
+               insert-and-copy length Huffman code with the current
+               insert-and-copy block type index
+
+            Insert length number of literals, with the following format:
+
+                  Block type code for next literal block type, appears
+                     only if NBLTYPESL >= 2 and the previous literal
+                     block has ended
+
+                  Block length code + Extra bits for next literal block
+                     length, appears only if NBLTYPESL >= 2 and the
+                     previous literal block has ended
+
+                  Next byte of the input data, encoded with the literal
+                     Huffman code with the index determined by the
+                     previuos two bytes of the input data, the current
+                     literal block type and the context map, as
+                     described in Paragraph 7.3.
+
+            Block type code for next distance block type, appears only
+               if NBLTYPESD >= 2 and the previous distance block has
+               ended
+
+            Block length code + Extra bits for next distance block
+               length, appears only if NBLTYPESD >= 2 and the previous
+               distance block has ended
+
+            Distance code, encoded as in section 4, using the distance
+               Huffman code with the current distance block type index,
+               appears only if the distance code is not an implicit 0,
+               as indicated by the insert-and-copy length code
+
+      The number of commands in the meta-block is such that the sum of
+      insert lengths and copy lengths over all the commands gives the
+      uncompressed length, MLEN encoded in the meta-block header.
+
+10. Decoding algorithm
+
+   The decoding algorithm that produces the output data is as follows:
+
+      read window size
+      do
+         read ISLAST bit
+         if ISLAST
+            read ISEMPTY bit
+            if ISEMPTY
+               break from loop
+         read MLEN
+         if not ISLAST
+            read ISUNCOMPRESSED bit
+            if ISUNCOMPRESSED
+               skip any bits up to the next byte boundary
+               copy MLEN bytes of input to the output stream
+               continue to the next meta-block
+         loop for each three block categories (i = L, I, D)
+            read NBLTYPESi
+            if NBLTYPESi >= 2
+               read Huffman code for block types, HTREE_BTYPE_i
+               read Huffman code for block lengths, HTREE_BLEN_i
+               read block length, BLEN_i
+               set block type, BTYPE_i to 0
+               initialize second last and last block types to 0 and 1
+            else
+               set block type, BTYPE_i to 0
+               set block length, BLEN_i to 268435456
+         read NPOSTFIX and NDIRECT
+         read array of literal context modes, CMODE[]
+         read NTREESL
+         if NTREESL >= 2
+            read literal context map, CMAPL[]
+         else
+            fill CMAPL[] with zeros
+         read NTREESD
+         if NTREESD >= 2
+            read distance context map, CMAPD[]
+         else
+            fill CMAPD[] with zeros
+         read array of Huffman codes for literals, HTREEL[]
+         read array of Huffman codes for insert-and-copy, HTREEI[]
+         read array of Huffman codes for distances, HTREED[]
+         do
+            if BLEN_I is zero
+               read block type using HTREE_BTYPE_I and set BTYPE_I
+               read block length using HTREE_BLEN_I and set BLEN_I
+            decrement BLEN_I
+            read insert and copy length, ILEN, CLEN with HTREEI[BTYPE_I]
+            loop for ILEN
+               if BLEN_L is zero
+                  read block type using HTREE_BTYPE_L and set BTYPE_L
+                  read block length using HTREE_BLEN_L and set BLEN_L
+               decrement BLEN_L
+               look up context mode CMODE[BTYPE_L]
+               compute context id, CIDL from last two bytes of output
+               read literal using HTREEL[CMAPL[64 * BTYPE_L + CIDL]]
+               copy literal to output stream
+            if number of output bytes produced in the loop is MLEN
+               break from loop
+            if distance code is implicit zero from insert-and-copy code
+               set backward distance to the last distance
+            else
+               if BLEN_D is zero
+                  read block type using HTREE_BTYPE_D and set BTYPE_D
+                  read block length using HTREE_BLEN_D and set BLEN_D
+               decrement BLEN_D
+               compute context id, CIDD from CLEN
+               read distance code with HTREED[CMAPD[4 * BTYPE_D + CIDD]]
+               compute distance by distance short code substitution
+            move backwards distance bytes in the output stream, and
+              copy CLEN bytes from this position to the output stream,
+              or look up the static dictionary word and copy it to the
+              output stram
+         while number of output bytes produced in the loop < MLEN
+      while not ISLAST
+
+      Note that a duplicated string reference may refer to a string in a
+      previous meta-block; i.e., the backward distance may cross one or
+      more meta-block boundaries. However a backward copy distance
+      cannot refer past the beginning of the output stream, and it can
+      not be greater than the window size, any such distance must be
+      interpreted as a reference to a static dictionary word. Note also
+      that the referenced string may overlap the current position; for
+      example, if the last 2 bytes decoded have values X and Y, a string
+      reference with <length = 5, distance = 2> adds X,Y,X,Y,X to the
+      output stream.
+
+11. References
+
+   [1] Huffman, D. A., "A Method for the Construction of Minimum
+       Redundancy Codes", Proceedings of the Institute of Radio
+       Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101.
+
+   [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data
+       Compression", IEEE Transactions on Information Theory, Vol. 23,
+       No. 3, pp. 337-343.
+
+   [3] Deutsch, P., "DEFLATE Compressed Data Format Specification
+       version 1.3", RFC 1951, Aladdin Enterprises, May 1996.
+       http://www.ietf.org/rfc/rfc1951.txt
+
+12. Source code
+
+   Source code for a C language implementation of a "brotli" compliant
+   decompressor and a C++ language implementation of a compressor is
+   available in the brotli/ directory within the font-compression-
+   reference open-source project:
+   https://code.google.com/p/font-compression-reference/source/browse/
+
+Appendix A. List of dictionary words
+
+   TO BE WRITTEN
+
+Appendix B. List of word transformations
+
+   TO BE WRITTEN
diff --git a/brotli/dec/Makefile b/brotli/dec/Makefile
new file mode 100644
index 0000000..f1e39b9
--- /dev/null
+++ b/brotli/dec/Makefile
@@ -0,0 +1,10 @@
+#brotli/dec
+
+include ../../shared.mk
+
+OBJS = bit_reader.o decode.o huffman.o safe_malloc.o streams.o
+
+all : $(OBJS)
+
+clean :
+	rm -f $(OBJS)
diff --git a/brotli/dec/README b/brotli/dec/README
new file mode 100644
index 0000000..933bdfd
--- /dev/null
+++ b/brotli/dec/README
@@ -0,0 +1,3 @@
+This directory holds the decoder for brotli compression format.
+
+Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
diff --git a/brotli/dec/bit_reader.c b/brotli/dec/bit_reader.c
new file mode 100644
index 0000000..9781248
--- /dev/null
+++ b/brotli/dec/bit_reader.c
@@ -0,0 +1,48 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Bit reading helpers
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./bit_reader.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
+  size_t i;
+  assert(br != NULL);
+
+  br->input_ = input;
+  br->val_ = 0;
+  br->pos_ = 0;
+  br->bit_pos_ = 0;
+  br->end_pos_ = 0;
+  br->eos_ = 0;
+  if (!BrotliReadMoreInput(br)) {
+    return 0;
+  }
+  for (i = 0; i < sizeof(br->val_); ++i) {
+    br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
+    ++br->pos_;
+  }
+  return (br->end_pos_ > 0);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/brotli/dec/bit_reader.h b/brotli/dec/bit_reader.h
new file mode 100644
index 0000000..96be036
--- /dev/null
+++ b/brotli/dec/bit_reader.h
@@ -0,0 +1,167 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Bit reading helpers
+
+#ifndef BROTLI_DEC_BIT_READER_H_
+#define BROTLI_DEC_BIT_READER_H_
+
+#include <string.h>
+#include "./streams.h"
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_MAX_NUM_BIT_READ   25
+#define BROTLI_READ_SIZE          4096
+#define BROTLI_IBUF_SIZE          (2 * BROTLI_READ_SIZE + 32)
+#define BROTLI_IBUF_MASK          (2 * BROTLI_READ_SIZE - 1)
+
+#define UNALIGNED_COPY64(dst, src) *(uint64_t*)(dst) = *(const uint64_t*)(src)
+
+static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
+  0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
+  65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
+};
+
+typedef struct {
+  // Input byte buffer, consist of a ringbuffer and a "slack" region where
+  // bytes from the start of the ringbuffer are copied.
+  uint8_t buf_[BROTLI_IBUF_SIZE];
+  BrotliInput input_;    // input callback
+  uint64_t    val_;      // pre-fetched bits
+  size_t      pos_;      // byte position in stream
+  int         bit_pos_;  // current bit-reading position in val_
+  size_t      end_pos_;  // current end position in stream
+  int         eos_;      // input stream is finished
+} BrotliBitReader;
+
+int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
+
+// Return the prefetched bits, so they can be looked up.
+static BROTLI_INLINE uint32_t BrotliPrefetchBits(BrotliBitReader* const br) {
+  return (uint32_t)(br->val_ >> br->bit_pos_);
+}
+
+// For jumping over a number of bits in the bit stream when accessed with
+// BrotliPrefetchBits and BrotliFillBitWindow.
+static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br, int val) {
+#ifdef BROTLI_DECODE_DEBUG
+  int n_bits = val - br->bit_pos_;
+  const uint32_t bval = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+  printf("[BrotliReadBits]  %010ld %2d  val: %6x\n",
+         (br->pos_ << 3) + br->bit_pos_ - 64, n_bits, bval);
+#endif
+  br->bit_pos_ = val;
+}
+
+// Reload up to 64 bits byte-by-byte
+static BROTLI_INLINE void ShiftBytes(BrotliBitReader* const br) {
+  while (br->bit_pos_ >= 8) {
+    br->val_ >>= 8;
+    br->val_ |= ((uint64_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 56;
+    ++br->pos_;
+    br->bit_pos_ -= 8;
+  }
+}
+
+// Fills up the input ringbuffer by calling the input callback.
+//
+// Does nothing if there are at least 32 bytes present after current position.
+//
+// Returns 0 if either:
+//  - the input callback returned an error, or
+//  - there is no more input and the position is past the end of the stream.
+//
+// After encountering the end of the input stream, 32 additional zero bytes are
+// copied to the ringbuffer, therefore it is safe to call this function after
+// every 32 bytes of input is read.
+static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
+  if (br->pos_ + 32 < br->end_pos_) {
+    return 1;
+  } else if (br->eos_) {
+    return (br->pos_ << 3) + br->bit_pos_ <= (br->end_pos_ << 3) + 64;
+  } else {
+    uint8_t* dst = br->buf_ + (br->end_pos_ & BROTLI_IBUF_MASK);
+    int bytes_read = BrotliRead(br->input_, dst, BROTLI_READ_SIZE);
+    if (bytes_read < 0) {
+      return 0;
+    }
+    if (bytes_read < BROTLI_READ_SIZE) {
+      br->eos_ = 1;
+      // Store 32 bytes of zero after the stream end.
+#if (defined(__x86_64__) || defined(_M_X64))
+      *(uint64_t*)(dst + bytes_read) = 0;
+      *(uint64_t*)(dst + bytes_read + 8) = 0;
+      *(uint64_t*)(dst + bytes_read + 16) = 0;
+      *(uint64_t*)(dst + bytes_read + 24) = 0;
+#else
+      memset(dst + bytes_read, 0, 32);
+#endif
+    }
+    if (dst == br->buf_) {
+      // Copy the head of the ringbuffer to the slack region.
+#if (defined(__x86_64__) || defined(_M_X64))
+      UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 32, br->buf_);
+      UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 24, br->buf_ + 8);
+      UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 16, br->buf_ + 16);
+      UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 8, br->buf_ + 24);
+#else
+      memcpy(br->buf_ + (BROTLI_READ_SIZE << 1), br->buf_, 32);
+#endif
+    }
+    br->end_pos_ += bytes_read;
+    return 1;
+  }
+}
+
+// Advances the Read buffer by 5 bytes to make room for reading next 24 bits.
+static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) {
+  if (br->bit_pos_ >= 40) {
+#if (defined(__x86_64__) || defined(_M_X64))
+    br->val_ >>= 40;
+    br->bit_pos_ -= 40;
+    // The expression below needs a little-endian arch to work correctly.
+    // This gives a large speedup for decoding speed.
+    br->val_ |= *(const uint64_t*)(
+        br->buf_ + (br->pos_ & BROTLI_IBUF_MASK)) << 24;
+    br->pos_ += 5;
+#else
+    ShiftBytes(br);
+#endif
+  }
+}
+
+// Reads the specified number of bits from Read Buffer.
+// Requires that n_bits is positive.
+static BROTLI_INLINE uint32_t BrotliReadBits(
+    BrotliBitReader* const br, int n_bits) {
+  uint32_t val;
+  BrotliFillBitWindow(br);
+  val = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+#ifdef BROTLI_DECODE_DEBUG
+  printf("[BrotliReadBits]  %010ld %2d  val: %6x\n",
+         (br->pos_ << 3) + br->bit_pos_ - 64, n_bits, val);
+#endif
+  br->bit_pos_ += n_bits;
+  return val;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // BROTLI_DEC_BIT_READER_H_
diff --git a/brotli/dec/context.h b/brotli/dec/context.h
new file mode 100644
index 0000000..212a445
--- /dev/null
+++ b/brotli/dec/context.h
@@ -0,0 +1,260 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Lookup table to map the previous two bytes to a context id.
+//
+// There are four different context modeling modes defined here:
+//   CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+//   CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+//   CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+//   CONTEXT_SIGNED: second-order context model tuned for signed integers.
+//
+// The context id for the UTF8 context model is calculated as follows. If p1
+// and p2 are the previous two bytes, we calcualte the context as
+//
+//   context = kContextLookup[p1] | kContextLookup[p2 + 256].
+//
+// If the previous two bytes are ASCII characters (i.e. < 128), this will be
+// equivalent to
+//
+//   context = 4 * context1(p1) + context2(p2),
+//
+// where context1 is based on the previous byte in the following way:
+//
+//   0  : non-ASCII control
+//   1  : \t, \n, \r
+//   2  : space
+//   3  : other punctuation
+//   4  : " '
+//   5  : %
+//   6  : ( < [ {
+//   7  : ) > ] }
+//   8  : , ; :
+//   9  : .
+//   10 : =
+//   11 : number
+//   12 : upper-case vowel
+//   13 : upper-case consonant
+//   14 : lower-case vowel
+//   15 : lower-case consonant
+//
+// and context2 is based on the second last byte:
+//
+//   0 : control, space
+//   1 : punctuation
+//   2 : upper-case letter, number
+//   3 : lower-case letter
+//
+// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+// stream it will be a continuation byte, value between 128 and 191), the
+// context is the same as if the second last byte was an ASCII control or space.
+//
+// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+// be a continuation byte and the context id is 2 or 3 depending on the LSB of
+// the last byte and to a lesser extent on the second last byte if it is ASCII.
+//
+// If the last byte is a UTF8 continuation byte, the second last byte can be:
+//   - continuation byte: the next byte is probably ASCII or lead byte (assuming
+//     4-byte UTF8 characters are rare) and the context id is 0 or 1.
+//   - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+//   - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+//
+// The possible value combinations of the previous two bytes, the range of
+// context ids and the type of the next byte is summarized in the table below:
+//
+// |--------\-----------------------------------------------------------------|
+// |         \                         Last byte                              |
+// | Second   \---------------------------------------------------------------|
+// | last byte \    ASCII            |   cont. byte        |   lead byte      |
+// |            \   (0-127)          |   (128-191)         |   (192-)         |
+// |=============|===================|=====================|==================|
+// |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
+// |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
+// |-------------|-------------------|---------------------|------------------|
+// |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
+// |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
+// |-------------|-------------------|---------------------|------------------|
+// |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
+// |  (192-207)  |                   |  context: 0 - 1     |                  |
+// |-------------|-------------------|---------------------|------------------|
+// |  lead byte  | not valid         |  next: cont.        |  not valid       |
+// |  (208-)     |                   |  context: 2 - 3     |                  |
+// |-------------|-------------------|---------------------|------------------|
+//
+// The context id for the signed context mode is calculated as:
+//
+//   context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
+//
+// For any context modeling modes, the context ids can be calculated by |-ing
+// together two lookups from one table using context model dependent offsets:
+//
+//   context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
+//
+// where offset1 and offset2 are dependent on the context mode.
+
+#ifndef BROTLI_DEC_CONTEXT_H_
+#define BROTLI_DEC_CONTEXT_H_
+
+#include "./types.h"
+
+enum ContextType {
+  CONTEXT_LSB6         = 0,
+  CONTEXT_MSB6         = 1,
+  CONTEXT_UTF8         = 2,
+  CONTEXT_SIGNED       = 3
+};
+
+// Common context lookup table for all context modes.
+static const uint8_t kContextLookup[1792] = {
+  // CONTEXT_UTF8, last byte.
+  //
+  // ASCII range.
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+  // UTF8 continuation byte range.
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  // UTF8 lead byte range.
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  // CONTEXT_UTF8 second last byte.
+  //
+  // ASCII range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+  // UTF8 continuation byte range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  // UTF8 lead byte range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  // CONTEXT_SIGNED, second last byte.
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  // CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits.
+   0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
+  // CONTEXT_LSB6, last byte.
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+  // CONTEXT_MSB6, last byte.
+   0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+   4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+   8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11,
+  12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+  16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+  20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+  24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+  28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+  32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+  36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+  40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+  44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+  48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+  52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+  56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+  60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+  // CONTEXT_{M,L}SB6, second last byte,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const int kContextLookupOffsets[8] = {
+  // CONTEXT_LSB6
+  1024, 1536,
+  // CONTEXT_MSB6
+  1280, 1536,
+  // CONTEXT_UTF8
+  0, 256,
+  // CONTEXT_SIGNED
+  768, 512,
+};
+
+#endif  // BROTLI_DEC_CONTEXT_H_
diff --git a/brotli/dec/decode.c b/brotli/dec/decode.c
new file mode 100644
index 0000000..f7ae1df
--- /dev/null
+++ b/brotli/dec/decode.c
@@ -0,0 +1,966 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "./bit_reader.h"
+#include "./context.h"
+#include "./decode.h"
+#include "./huffman.h"
+#include "./prefix.h"
+#include "./safe_malloc.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef BROTLI_DECODE_DEBUG
+#define BROTLI_LOG_UINT(name)                                    \
+  printf("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name))
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx)                  \
+  printf("[%s] %s[%lu] = %lu\n", __func__, #array_name, \
+         (unsigned long)(idx), (unsigned long)array_name[idx])
+#else
+#define BROTLI_LOG_UINT(name)
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx)
+#endif
+
+static const int kDefaultCodeLength = 8;
+static const int kCodeLengthRepeatCode = 16;
+static const int kNumLiteralCodes = 256;
+static const int kNumInsertAndCopyCodes = 704;
+static const int kNumBlockLengthCodes = 26;
+static const int kLiteralContextBits = 6;
+static const int kDistanceContextBits = 2;
+
+#define CODE_LENGTH_CODES 18
+static const uint8_t kCodeLengthCodeOrder[CODE_LENGTH_CODES] = {
+  1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+
+#define NUM_DISTANCE_SHORT_CODES 16
+static const int kDistanceShortCodeIndexOffset[NUM_DISTANCE_SHORT_CODES] = {
+  3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+};
+
+static const int kDistanceShortCodeValueOffset[NUM_DISTANCE_SHORT_CODES] = {
+  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+};
+
+static BROTLI_INLINE int DecodeWindowBits(BrotliBitReader* br) {
+  if (BrotliReadBits(br, 1)) {
+    return 17 + BrotliReadBits(br, 3);
+  } else {
+    return 16;
+  }
+}
+
+// Decodes a number in the range [0..255], by reading 1 - 11 bits.
+static BROTLI_INLINE int DecodeVarLenUint8(BrotliBitReader* br) {
+  if (BrotliReadBits(br, 1)) {
+    int nbits = BrotliReadBits(br, 3);
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return BrotliReadBits(br, nbits) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+static void DecodeMetaBlockLength(BrotliBitReader* br,
+                                  size_t* meta_block_length,
+                                  int* input_end,
+                                  int* is_uncompressed) {
+  int size_nibbles;
+  int i;
+  *input_end = BrotliReadBits(br, 1);
+  *meta_block_length = 0;
+  *is_uncompressed = 0;
+  if (*input_end && BrotliReadBits(br, 1)) {
+    return;
+  }
+  size_nibbles = BrotliReadBits(br, 2) + 4;
+  for (i = 0; i < size_nibbles; ++i) {
+    *meta_block_length |= BrotliReadBits(br, 4) << (i * 4);
+  }
+  ++(*meta_block_length);
+  if (!*input_end) {
+    *is_uncompressed = BrotliReadBits(br, 1);
+  }
+}
+
+// Decodes the next Huffman code from bit-stream.
+static BROTLI_INLINE int ReadSymbol(const HuffmanTree* tree,
+                                    BrotliBitReader* br) {
+  uint32_t bits;
+  int bitpos;
+  int lut_ix;
+  int lut_bits;
+  const HuffmanTreeNode* node = tree->root_;
+  BrotliFillBitWindow(br);
+  bits = BrotliPrefetchBits(br);
+  bitpos = br->bit_pos_;
+  // Check if we find the bit combination from the Huffman lookup table.
+  lut_ix = bits & (HUFF_LUT - 1);
+  lut_bits = tree->lut_bits_[lut_ix];
+  if (lut_bits <= HUFF_LUT_BITS) {
+    BrotliSetBitPos(br, bitpos + lut_bits);
+    return tree->lut_symbol_[lut_ix];
+  }
+  node += tree->lut_jump_[lut_ix];
+  bitpos += HUFF_LUT_BITS;
+  bits >>= HUFF_LUT_BITS;
+
+  // Decode the value from a binary tree.
+  assert(node != NULL);
+  do {
+    node = HuffmanTreeNextNode(node, bits & 1);
+    bits >>= 1;
+    ++bitpos;
+  } while (HuffmanTreeNodeIsNotLeaf(node));
+  BrotliSetBitPos(br, bitpos);
+  return node->symbol_;
+}
+
+static void PrintUcharVector(const uint8_t* v, int len) {
+  while (len-- > 0) printf(" %d", *v++);
+  printf("\n");
+}
+
+static int ReadHuffmanCodeLengths(
+    const uint8_t* code_length_code_lengths,
+    int num_symbols, uint8_t* code_lengths,
+    BrotliBitReader* br) {
+  int ok = 0;
+  int symbol;
+  int max_symbol;
+  int decode_number_of_code_length_codes;
+  int prev_code_len = kDefaultCodeLength;
+  int repeat = 0;
+  int repeat_length = 0;
+  HuffmanTree tree;
+
+  if (!BrotliHuffmanTreeBuildImplicit(&tree, code_length_code_lengths,
+                                      CODE_LENGTH_CODES)) {
+    printf("[ReadHuffmanCodeLengths] Building code length tree failed: ");
+    PrintUcharVector(code_length_code_lengths, CODE_LENGTH_CODES);
+    return 0;
+  }
+
+  if (!BrotliReadMoreInput(br)) {
+    printf("[ReadHuffmanCodeLengths] Unexpected end of input.\n");
+    return 0;
+  }
+  decode_number_of_code_length_codes = BrotliReadBits(br, 1);
+  BROTLI_LOG_UINT(decode_number_of_code_length_codes);
+  if (decode_number_of_code_length_codes) {
+    if (BrotliReadBits(br, 1)) {
+      max_symbol = 68 + BrotliReadBits(br, 7);
+    } else {
+      max_symbol = 4 + BrotliReadBits(br, 6);
+    }
+    if (max_symbol > num_symbols) {
+      printf("[ReadHuffmanCodeLengths] max_symbol > num_symbols (%d vs %d)\n",
+             max_symbol, num_symbols);
+      goto End;
+    }
+  } else {
+    max_symbol = num_symbols;
+  }
+  BROTLI_LOG_UINT(max_symbol);
+
+  symbol = 0;
+  while (symbol + repeat < num_symbols) {
+    int code_len;
+    if (max_symbol-- == 0) break;
+    if (!BrotliReadMoreInput(br)) {
+      printf("[ReadHuffmanCodeLengths] Unexpected end of input.\n");
+      goto End;
+    }
+    code_len = ReadSymbol(&tree, br);
+    BROTLI_LOG_UINT(symbol);
+    BROTLI_LOG_UINT(repeat);
+    BROTLI_LOG_UINT(repeat_length);
+    BROTLI_LOG_UINT(code_len);
+    if ((code_len < kCodeLengthRepeatCode) ||
+        (code_len == kCodeLengthRepeatCode && repeat_length == 0) ||
+        (code_len > kCodeLengthRepeatCode && repeat_length > 0)) {
+      while (repeat > 0) {
+        code_lengths[symbol++] = repeat_length;
+        --repeat;
+      }
+    }
+    if (code_len < kCodeLengthRepeatCode) {
+      code_lengths[symbol++] = code_len;
+      if (code_len != 0) prev_code_len = code_len;
+    } else {
+      const int extra_bits = code_len - 14;
+      if (repeat > 0) {
+        repeat -= 2;
+        repeat <<= extra_bits;
+      }
+      repeat += BrotliReadBits(br, extra_bits) + 3;
+      repeat_length = (code_len == kCodeLengthRepeatCode ? prev_code_len : 0);
+    }
+  }
+  if (symbol + repeat > num_symbols) {
+    printf("[ReadHuffmanCodeLengths] symbol + repeat > num_symbols "
+           "(%d + %d vs %d)\n", symbol, repeat, num_symbols);
+    goto End;
+  }
+  while (repeat-- > 0) code_lengths[symbol++] = repeat_length;
+  while (symbol < num_symbols) code_lengths[symbol++] = 0;
+  ok = 1;
+
+ End:
+  BrotliHuffmanTreeRelease(&tree);
+  return ok;
+}
+
+static int ReadHuffmanCode(int alphabet_size,
+                           HuffmanTree* tree,
+                           BrotliBitReader* br) {
+  int ok = 1;
+  int simple_code;
+  uint8_t* code_lengths = NULL;
+
+  code_lengths =
+      (uint8_t*)BrotliSafeMalloc((uint64_t)alphabet_size,
+                                 sizeof(*code_lengths));
+  if (code_lengths == NULL) {
+    return 0;
+  }
+  if (!BrotliReadMoreInput(br)) {
+    printf("[ReadHuffmanCode] Unexpected end of input.\n");
+    return 0;
+  }
+  simple_code = BrotliReadBits(br, 1);
+  BROTLI_LOG_UINT(simple_code);
+  if (simple_code) {  // Read symbols, codes & code lengths directly.
+    int i;
+    int max_bits_counter = alphabet_size - 1;
+    int max_bits = 0;
+    int symbols[4] = { 0 };
+    const int num_symbols = BrotliReadBits(br, 2) + 1;
+    while (max_bits_counter) {
+      max_bits_counter >>= 1;
+      ++max_bits;
+    }
+    memset(code_lengths, 0, alphabet_size);
+    for (i = 0; i < num_symbols; ++i) {
+      symbols[i] = BrotliReadBits(br, max_bits);
+      code_lengths[symbols[i]] = 2;
+    }
+    code_lengths[symbols[0]] = 1;
+    switch (num_symbols) {
+      case 1:
+      case 3:
+        break;
+      case 2:
+        code_lengths[symbols[1]] = 1;
+        break;
+      case 4:
+        if (BrotliReadBits(br, 1)) {
+          code_lengths[symbols[2]] = 3;
+          code_lengths[symbols[3]] = 3;
+        } else {
+          code_lengths[symbols[0]] = 2;
+        }
+        break;
+    }
+    BROTLI_LOG_UINT(num_symbols);
+  } else {  // Decode Huffman-coded code lengths.
+    int i;
+    uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
+    const int num_codes = BrotliReadBits(br, 4) + 3;
+    BROTLI_LOG_UINT(num_codes);
+    if (num_codes > CODE_LENGTH_CODES) {
+      return 0;
+    }
+    for (i = BrotliReadBits(br, 1) * 2; i < num_codes; ++i) {
+      int code_len_idx = kCodeLengthCodeOrder[i];
+      int v = BrotliReadBits(br, 2);
+      if (v == 1) {
+        v = BrotliReadBits(br, 1);
+        if (v == 0) {
+          v = 2;
+        } else {
+          v = BrotliReadBits(br, 1);
+          if (v == 0) {
+            v = 1;
+          } else {
+            v = 5;
+          }
+        }
+      } else if (v == 2) {
+        v = 4;
+      }
+      code_length_code_lengths[code_len_idx] = v;
+      BROTLI_LOG_ARRAY_INDEX(code_length_code_lengths, code_len_idx);
+    }
+    ok = ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size,
+                                code_lengths, br);
+  }
+  if (ok) {
+    ok = BrotliHuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size);
+    if (!ok) {
+      printf("[ReadHuffmanCode] HuffmanTreeBuildImplicit failed: ");
+      PrintUcharVector(code_lengths, alphabet_size);
+    }
+  }
+  free(code_lengths);
+  return ok;
+}
+
+static int ReadCopyDistance(const HuffmanTree* tree,
+                            int num_direct_codes,
+                            int postfix_bits,
+                            uint32_t postfix_mask,
+                            BrotliBitReader* br) {
+  int code;
+  int nbits;
+  int postfix;
+  int offset;
+  code = ReadSymbol(tree, br);
+  if (code < num_direct_codes) {
+    return code;
+  }
+  code -= num_direct_codes;
+  postfix = code & postfix_mask;
+  code >>= postfix_bits;
+  nbits = (code >> 1) + 1;
+  offset = ((2 + (code & 1)) << nbits) - 4;
+  return (num_direct_codes +
+          ((offset + BrotliReadBits(br, nbits)) << postfix_bits) +
+          postfix);
+}
+
+static int ReadBlockLength(const HuffmanTree* tree, BrotliBitReader* br) {
+  int code;
+  int nbits;
+  code = ReadSymbol(tree, br);
+  nbits = kBlockLengthPrefixCode[code].nbits;
+  return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
+}
+
+static void ReadInsertAndCopy(const HuffmanTree* tree,
+                              int* insert_len,
+                              int* copy_len,
+                              int* copy_dist,
+                              BrotliBitReader* br) {
+  int code;
+  int range_idx;
+  int insert_code;
+  int insert_extra_bits;
+  int copy_code;
+  int copy_extra_bits;
+  code = ReadSymbol(tree, br);
+  range_idx = code >> 6;
+  if (range_idx >= 2) {
+    range_idx -= 2;
+    *copy_dist = -1;
+  } else {
+    *copy_dist = 0;
+  }
+  insert_code = kInsertRangeLut[range_idx] + ((code >> 3) & 7);
+  copy_code = kCopyRangeLut[range_idx] + (code & 7);
+  *insert_len = kInsertLengthPrefixCode[insert_code].offset;
+  insert_extra_bits = kInsertLengthPrefixCode[insert_code].nbits;
+  if (insert_extra_bits > 0) {
+    *insert_len += BrotliReadBits(br, insert_extra_bits);
+  }
+  *copy_len = kCopyLengthPrefixCode[copy_code].offset;
+  copy_extra_bits = kCopyLengthPrefixCode[copy_code].nbits;
+  if (copy_extra_bits > 0) {
+    *copy_len += BrotliReadBits(br, copy_extra_bits);
+  }
+}
+
+static int TranslateShortCodes(int code, int* ringbuffer, size_t index) {
+  int val;
+  if (code < NUM_DISTANCE_SHORT_CODES) {
+    index += kDistanceShortCodeIndexOffset[code];
+    index &= 3;
+    val = ringbuffer[index] + kDistanceShortCodeValueOffset[code];
+  } else {
+    val = code - NUM_DISTANCE_SHORT_CODES + 1;
+  }
+  return val;
+}
+
+static void MoveToFront(uint8_t* v, uint8_t index) {
+  uint8_t value = v[index];
+  uint8_t i = index;
+  for (; i; --i) v[i] = v[i - 1];
+  v[0] = value;
+}
+
+static void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+  uint8_t mtf[256];
+  int i;
+  for (i = 0; i < 256; ++i) {
+    mtf[i] = i;
+  }
+  for (i = 0; i < v_len; ++i) {
+    uint8_t index = v[i];
+    v[i] = mtf[index];
+    if (index) MoveToFront(mtf, index);
+  }
+}
+
+// Contains a collection of huffman trees with the same alphabet size.
+typedef struct {
+  int alphabet_size;
+  int num_htrees;
+  HuffmanTree* htrees;
+} HuffmanTreeGroup;
+
+static void HuffmanTreeGroupInit(HuffmanTreeGroup* group, int alphabet_size,
+                                 int ntrees) {
+  group->alphabet_size = alphabet_size;
+  group->num_htrees = ntrees;
+  group->htrees = (HuffmanTree*)malloc(sizeof(HuffmanTree) * ntrees);
+}
+
+static void HuffmanTreeGroupRelease(HuffmanTreeGroup* group) {
+  int i;
+  for (i = 0; i < group->num_htrees; ++i) {
+    BrotliHuffmanTreeRelease(&group->htrees[i]);
+  }
+  free(group->htrees);
+}
+
+static int HuffmanTreeGroupDecode(HuffmanTreeGroup* group,
+                                  BrotliBitReader* br) {
+  int i;
+  for (i = 0; i < group->num_htrees; ++i) {
+    if (!ReadHuffmanCode(group->alphabet_size, &group->htrees[i], br)) {
+      return 0;
+    }
+  }
+  return 1;
+}
+
+static int DecodeContextMap(int context_map_size,
+                            int* num_htrees,
+                            uint8_t** context_map,
+                            BrotliBitReader* br) {
+  int ok = 1;
+  if (!BrotliReadMoreInput(br)) {
+    printf("[DecodeContextMap] Unexpected end of input.\n");
+    return 0;
+  }
+  *num_htrees = DecodeVarLenUint8(br) + 1;
+
+  BROTLI_LOG_UINT(context_map_size);
+  BROTLI_LOG_UINT(*num_htrees);
+
+  *context_map = (uint8_t*)malloc(context_map_size);
+  if (*num_htrees <= 1) {
+    memset(*context_map, 0, context_map_size);
+    return 1;
+  }
+
+  {
+    HuffmanTree tree_index_htree;
+    int use_rle_for_zeros = BrotliReadBits(br, 1);
+    int max_run_length_prefix = 0;
+    int i;
+    if (use_rle_for_zeros) {
+      max_run_length_prefix = BrotliReadBits(br, 4) + 1;
+    }
+    if (!ReadHuffmanCode(*num_htrees + max_run_length_prefix,
+                         &tree_index_htree, br)) {
+      return 0;
+    }
+    for (i = 0; i < context_map_size;) {
+      int code;
+      if (!BrotliReadMoreInput(br)) {
+        printf("[DecodeContextMap] Unexpected end of input.\n");
+        ok = 0;
+        goto End;
+      }
+      code = ReadSymbol(&tree_index_htree, br);
+      if (code == 0) {
+        (*context_map)[i] = 0;
+        ++i;
+      } else if (code <= max_run_length_prefix) {
+        int reps = 1 + (1 << code) + BrotliReadBits(br, code);
+        while (--reps) {
+          (*context_map)[i] = 0;
+          ++i;
+        }
+      } else {
+        (*context_map)[i] = code - max_run_length_prefix;
+        ++i;
+      }
+    }
+   End:
+    BrotliHuffmanTreeRelease(&tree_index_htree);
+  }
+  if (BrotliReadBits(br, 1)) {
+    InverseMoveToFrontTransform(*context_map, context_map_size);
+  }
+  return ok;
+}
+
+static BROTLI_INLINE void DecodeBlockType(const HuffmanTree* trees,
+                                         int tree_type,
+                                         int* block_types,
+                                         int* ringbuffers,
+                                         size_t* indexes,
+                                         BrotliBitReader* br) {
+  int* ringbuffer = ringbuffers + tree_type * 2;
+  size_t* index = indexes + tree_type;
+  int type_code = ReadSymbol(trees + tree_type, br);
+  int block_type;
+  if (type_code == 0) {
+    block_type = ringbuffer[*index & 1];
+  } else if (type_code == 1) {
+    block_type = ringbuffer[(*index - 1) & 1] + 1;
+  } else {
+    block_type = type_code - 2;
+  }
+  block_types[tree_type] = block_type;
+  ringbuffer[(*index) & 1] = block_type;
+  ++(*index);
+}
+
+// Copy len bytes from src to dst. It can write up to ten extra bytes
+// after the end of the copy.
+//
+// The main part of this loop is a simple copy of eight bytes at a time until
+// we've copied (at least) the requested amount of bytes.  However, if dst and
+// src are less than eight bytes apart (indicating a repeating pattern of
+// length < 8), we first need to expand the pattern in order to get the correct
+// results. For instance, if the buffer looks like this, with the eight-byte
+// <src> and <dst> patterns marked as intervals:
+//
+//    abxxxxxxxxxxxx
+//    [------]           src
+//      [------]         dst
+//
+// a single eight-byte copy from <src> to <dst> will repeat the pattern once,
+// after which we can move <dst> two bytes without moving <src>:
+//
+//    ababxxxxxxxxxx
+//    [------]           src
+//        [------]       dst
+//
+// and repeat the exercise until the two no longer overlap.
+//
+// This allows us to do very well in the special case of one single byte
+// repeated many times, without taking a big hit for more general cases.
+//
+// The worst case of extra writing past the end of the match occurs when
+// dst - src == 1 and len == 1; the last copy will read from byte positions
+// [0..7] and write to [4..11], whereas it was only supposed to write to
+// position 1. Thus, ten excess bytes.
+static BROTLI_INLINE void IncrementalCopyFastPath(
+    uint8_t* dst, const uint8_t* src, int len) {
+  if (src < dst) {
+    while (dst - src < 8) {
+      UNALIGNED_COPY64(dst, src);
+      len -= dst - src;
+      dst += dst - src;
+    }
+  }
+  while (len > 0) {
+    UNALIGNED_COPY64(dst, src);
+    src += 8;
+    dst += 8;
+    len -= 8;
+  }
+}
+
+int BrotliDecompressedSize(size_t encoded_size,
+                           const uint8_t* encoded_buffer,
+                           size_t* decoded_size) {
+  BrotliMemInput memin;
+  BrotliInput input = BrotliInitMemInput(encoded_buffer, encoded_size, &memin);
+  BrotliBitReader br;
+  size_t meta_block_len;
+  int input_end;
+  int is_uncompressed;
+  if (!BrotliInitBitReader(&br, input)) {
+    return 0;
+  }
+  DecodeWindowBits(&br);
+  DecodeMetaBlockLength(&br, &meta_block_len, &input_end, &is_uncompressed);
+  if (!input_end) {
+    return 0;
+  }
+  *decoded_size = meta_block_len;
+  return 1;
+}
+
+int BrotliDecompressBuffer(size_t encoded_size,
+                           const uint8_t* encoded_buffer,
+                           size_t* decoded_size,
+                           uint8_t* decoded_buffer) {
+  BrotliMemInput memin;
+  BrotliInput in = BrotliInitMemInput(encoded_buffer, encoded_size, &memin);
+  BrotliMemOutput mout;
+  BrotliOutput out = BrotliInitMemOutput(decoded_buffer, *decoded_size, &mout);
+  int success = BrotliDecompress(in, out);
+  *decoded_size = mout.pos;
+  return success;
+}
+
+int BrotliDecompress(BrotliInput input, BrotliOutput output) {
+  int ok = 1;
+  int i;
+  size_t pos = 0;
+  int input_end = 0;
+  int window_bits = 0;
+  size_t max_backward_distance;
+  size_t ringbuffer_size;
+  size_t ringbuffer_mask;
+  uint8_t* ringbuffer;
+  uint8_t* ringbuffer_end;
+  // This ring buffer holds a few past copy distances that will be used by
+  // some special distance codes.
+  int dist_rb[4] = { 16, 15, 11, 4 };
+  size_t dist_rb_idx = 0;
+  // The previous 2 bytes used for context.
+  uint8_t prev_byte1 = 0;
+  uint8_t prev_byte2 = 0;
+  HuffmanTreeGroup hgroup[3];
+  BrotliBitReader br;
+
+  static const int kRingBufferWriteAheadSlack = 16;
+
+  static const int kMaxDictionaryWordLength = 0;
+
+  if (!BrotliInitBitReader(&br, input)) {
+    return 0;
+  }
+
+  // Decode window size.
+  window_bits = DecodeWindowBits(&br);
+  max_backward_distance = (1ULL << window_bits) - 16;
+
+  ringbuffer_size = 1ULL << window_bits;
+  ringbuffer_mask = ringbuffer_size - 1;
+  ringbuffer = (uint8_t*)malloc(ringbuffer_size +
+                                kRingBufferWriteAheadSlack +
+                                kMaxDictionaryWordLength);
+  ringbuffer_end = ringbuffer + ringbuffer_size;
+
+  while (!input_end && ok) {
+    size_t meta_block_len = 0;
+    size_t meta_block_end_pos;
+    int is_uncompressed;
+    uint32_t block_length[3] = { 1 << 28, 1 << 28, 1 << 28 };
+    int block_type[3] = { 0 };
+    int num_block_types[3] = { 1, 1, 1 };
+    int block_type_rb[6] = { 0, 1, 0, 1, 0, 1 };
+    size_t block_type_rb_index[3] = { 0 };
+    HuffmanTree block_type_trees[3];
+    HuffmanTree block_len_trees[3];
+    int distance_postfix_bits;
+    int num_direct_distance_codes;
+    uint32_t distance_postfix_mask;
+    int num_distance_codes;
+    uint8_t* context_map = NULL;
+    uint8_t* context_modes = NULL;
+    int num_literal_htrees;
+    uint8_t* dist_context_map = NULL;
+    int num_dist_htrees;
+    int context_offset = 0;
+    uint8_t* context_map_slice = NULL;
+    uint8_t literal_htree_index = 0;
+    int dist_context_offset = 0;
+    uint8_t* dist_context_map_slice = NULL;
+    uint8_t dist_htree_index = 0;
+    int context_lookup_offset1 = 0;
+    int context_lookup_offset2 = 0;
+    uint8_t context_mode;
+
+    for (i = 0; i < 3; ++i) {
+      hgroup[i].num_htrees = 0;
+      hgroup[i].htrees = NULL;
+      block_type_trees[i].root_ = NULL;
+      block_len_trees[i].root_ = NULL;
+    }
+
+    if (!BrotliReadMoreInput(&br)) {
+      printf("[BrotliDecompress] Unexpected end of input.\n");
+      ok = 0;
+      goto End;
+    }
+    BROTLI_LOG_UINT(pos);
+    DecodeMetaBlockLength(&br, &meta_block_len, &input_end, &is_uncompressed);
+    BROTLI_LOG_UINT(meta_block_len);
+    if (meta_block_len == 0) {
+      goto End;
+    }
+    meta_block_end_pos = pos + meta_block_len;
+    if (is_uncompressed) {
+      BrotliSetBitPos(&br, (br.bit_pos_ + 7) & ~7);
+      for (; pos < meta_block_end_pos; ++pos) {
+        ringbuffer[pos & ringbuffer_mask] = BrotliReadBits(&br, 8);
+        if ((pos & ringbuffer_mask) == ringbuffer_mask) {
+          if (BrotliWrite(output, ringbuffer, ringbuffer_size) < 0) {
+            ok = 0;
+            goto End;
+          }
+        }
+      }
+      goto End;
+    }
+    for (i = 0; i < 3; ++i) {
+      block_type_trees[i].root_ = NULL;
+      block_len_trees[i].root_ = NULL;
+      num_block_types[i] = DecodeVarLenUint8(&br) + 1;
+      if (num_block_types[i] >= 2) {
+        if (!ReadHuffmanCode(
+                num_block_types[i] + 2, &block_type_trees[i], &br) ||
+            !ReadHuffmanCode(kNumBlockLengthCodes, &block_len_trees[i], &br)) {
+          ok = 0;
+          goto End;
+        }
+        block_length[i] = ReadBlockLength(&block_len_trees[i], &br);
+        block_type_rb_index[i] = 1;
+      }
+    }
+
+    BROTLI_LOG_UINT(num_block_types[0]);
+    BROTLI_LOG_UINT(num_block_types[1]);
+    BROTLI_LOG_UINT(num_block_types[2]);
+    BROTLI_LOG_UINT(block_length[0]);
+    BROTLI_LOG_UINT(block_length[1]);
+    BROTLI_LOG_UINT(block_length[2]);
+
+    if (!BrotliReadMoreInput(&br)) {
+      printf("[BrotliDecompress] Unexpected end of input.\n");
+      ok = 0;
+      goto End;
+    }
+    distance_postfix_bits = BrotliReadBits(&br, 2);
+    num_direct_distance_codes = NUM_DISTANCE_SHORT_CODES +
+        (BrotliReadBits(&br, 4) << distance_postfix_bits);
+    distance_postfix_mask = (1 << distance_postfix_bits) - 1;
+    num_distance_codes = (num_direct_distance_codes +
+                          (48 << distance_postfix_bits));
+    context_modes = (uint8_t*)malloc(num_block_types[0]);
+    for (i = 0; i < num_block_types[0]; ++i) {
+      context_modes[i] = BrotliReadBits(&br, 2) << 1;
+      BROTLI_LOG_ARRAY_INDEX(context_modes, i);
+    }
+    BROTLI_LOG_UINT(num_direct_distance_codes);
+    BROTLI_LOG_UINT(distance_postfix_bits);
+
+    if (!DecodeContextMap(num_block_types[0] << kLiteralContextBits,
+                          &num_literal_htrees, &context_map, &br) ||
+        !DecodeContextMap(num_block_types[2] << kDistanceContextBits,
+                          &num_dist_htrees, &dist_context_map, &br)) {
+      ok = 0;
+      goto End;
+    }
+
+    HuffmanTreeGroupInit(&hgroup[0], kNumLiteralCodes, num_literal_htrees);
+    HuffmanTreeGroupInit(&hgroup[1], kNumInsertAndCopyCodes,
+                         num_block_types[1]);
+    HuffmanTreeGroupInit(&hgroup[2], num_distance_codes, num_dist_htrees);
+
+    for (i = 0; i < 3; ++i) {
+      if (!HuffmanTreeGroupDecode(&hgroup[i], &br)) {
+        ok = 0;
+        goto End;
+      }
+    }
+
+    context_map_slice = context_map;
+    dist_context_map_slice = dist_context_map;
+    context_mode = context_modes[block_type[0]];
+    context_lookup_offset1 = kContextLookupOffsets[context_mode];
+    context_lookup_offset2 = kContextLookupOffsets[context_mode + 1];
+
+    while (pos < meta_block_end_pos) {
+      int insert_length;
+      int copy_length;
+      int distance_code;
+      int distance;
+      size_t max_distance;
+      uint8_t context;
+      int j;
+      const uint8_t* copy_src;
+      uint8_t* copy_dst;
+      if (!BrotliReadMoreInput(&br)) {
+        printf("[BrotliDecompress] Unexpected end of input.\n");
+        ok = 0;
+        goto End;
+      }
+      if (block_length[1] == 0) {
+        DecodeBlockType(block_type_trees, 1, block_type, block_type_rb,
+                        block_type_rb_index, &br);
+        block_length[1] = ReadBlockLength(&block_len_trees[1], &br);
+      }
+      --block_length[1];
+      ReadInsertAndCopy(&hgroup[1].htrees[block_type[1]],
+                        &insert_length, &copy_length, &distance_code, &br);
+      BROTLI_LOG_UINT(insert_length);
+      BROTLI_LOG_UINT(copy_length);
+      BROTLI_LOG_UINT(distance_code);
+      for (j = 0; j < insert_length; ++j) {
+        if (!BrotliReadMoreInput(&br)) {
+          printf("[BrotliDecompress] Unexpected end of input.\n");
+          ok = 0;
+          goto End;
+        }
+        if (block_length[0] == 0) {
+          DecodeBlockType(block_type_trees, 0, block_type, block_type_rb,
+                          block_type_rb_index, &br);
+          block_length[0] = ReadBlockLength(&block_len_trees[0], &br);
+          context_offset = block_type[0] << kLiteralContextBits;
+          context_map_slice = context_map + context_offset;
+          context_mode = context_modes[block_type[0]];
+          context_lookup_offset1 = kContextLookupOffsets[context_mode];
+          context_lookup_offset2 = kContextLookupOffsets[context_mode + 1];
+        }
+        context = (kContextLookup[context_lookup_offset1 + prev_byte1] |
+                   kContextLookup[context_lookup_offset2 + prev_byte2]);
+        BROTLI_LOG_UINT(context);
+        literal_htree_index = context_map_slice[context];
+        --block_length[0];
+        prev_byte2 = prev_byte1;
+        prev_byte1 = ReadSymbol(&hgroup[0].htrees[literal_htree_index], &br);
+        ringbuffer[pos & ringbuffer_mask] = prev_byte1;
+        BROTLI_LOG_UINT(literal_htree_index);
+        BROTLI_LOG_ARRAY_INDEX(ringbuffer, pos & ringbuffer_mask);
+        if ((pos & ringbuffer_mask) == ringbuffer_mask) {
+          if (BrotliWrite(output, ringbuffer, ringbuffer_size) < 0) {
+            ok = 0;
+            goto End;
+          }
+        }
+        ++pos;
+      }
+      if (pos == meta_block_end_pos) break;
+
+      if (distance_code < 0) {
+        uint8_t context;
+        if (!BrotliReadMoreInput(&br)) {
+          printf("[BrotliDecompress] Unexpected end of input.\n");
+          ok = 0;
+          goto End;
+        }
+        if (block_length[2] == 0) {
+          DecodeBlockType(block_type_trees, 2, block_type, block_type_rb,
+                          block_type_rb_index, &br);
+          block_length[2] = ReadBlockLength(&block_len_trees[2], &br);
+          dist_htree_index = block_type[2];
+          dist_context_offset = block_type[2] << kDistanceContextBits;
+          dist_context_map_slice = dist_context_map + dist_context_offset;
+        }
+        --block_length[2];
+        context = copy_length > 4 ? 3 : copy_length - 2;
+        dist_htree_index = dist_context_map_slice[context];
+        distance_code = ReadCopyDistance(&hgroup[2].htrees[dist_htree_index],
+                                         num_direct_distance_codes,
+                                         distance_postfix_bits,
+                                         distance_postfix_mask,
+                                         &br);
+      }
+
+      // Convert the distance code to the actual distance by possibly looking
+      // up past distnaces from the ringbuffer.
+      distance = TranslateShortCodes(distance_code, dist_rb, dist_rb_idx);
+      if (distance_code > 0) {
+        dist_rb[dist_rb_idx & 3] = distance;
+        ++dist_rb_idx;
+      }
+      BROTLI_LOG_UINT(distance);
+
+      max_distance = max_backward_distance;
+      if (pos < max_distance) {
+        max_distance = pos;
+      }
+
+      copy_dst = &ringbuffer[pos & ringbuffer_mask];
+
+      if ((size_t)distance > max_distance) {
+        printf("Invalid backward reference. pos: %lu distance: %d "
+               "len: %d end: %lu\n", (unsigned long)pos, distance, copy_length,
+               (unsigned long)meta_block_end_pos);
+        ok = 0;
+        goto End;
+      } else {
+        if (pos + copy_length > meta_block_end_pos) {
+          printf("Invalid backward reference. pos: %lu distance: %d "
+                 "len: %d end: %lu\n", (unsigned long)pos, distance,
+                 copy_length, (unsigned long)meta_block_end_pos);
+          ok = 0;
+          goto End;
+        }
+
+        copy_src = &ringbuffer[(pos - distance) & ringbuffer_mask];
+
+#if (defined(__x86_64__) || defined(_M_X64))
+        if (copy_src + copy_length <= ringbuffer_end &&
+            copy_dst + copy_length < ringbuffer_end) {
+          if (copy_length <= 16 && distance >= 8) {
+            UNALIGNED_COPY64(copy_dst, copy_src);
+            UNALIGNED_COPY64(copy_dst + 8, copy_src + 8);
+          } else {
+            IncrementalCopyFastPath(copy_dst, copy_src, copy_length);
+          }
+          pos += copy_length;
+          copy_length = 0;
+        }
+#endif
+
+        for (j = 0; j < copy_length; ++j) {
+          ringbuffer[pos & ringbuffer_mask] =
+              ringbuffer[(pos - distance) & ringbuffer_mask];
+          if ((pos & ringbuffer_mask) == ringbuffer_mask) {
+            if (BrotliWrite(output, ringbuffer, ringbuffer_size) < 0) {
+              ok = 0;
+              goto End;
+            }
+          }
+          ++pos;
+        }
+      }
+
+      // When we get here, we must have inserted at least one literal and made
+      // a copy of at least length two, therefore accessing the last 2 bytes is
+      // valid.
+      prev_byte1 = ringbuffer[(pos - 1) & ringbuffer_mask];
+      prev_byte2 = ringbuffer[(pos - 2) & ringbuffer_mask];
+    }
+ End:
+    free(context_modes);
+    free(context_map);
+    free(dist_context_map);
+    for (i = 0; i < 3; ++i) {
+      HuffmanTreeGroupRelease(&hgroup[i]);
+      BrotliHuffmanTreeRelease(&block_type_trees[i]);
+      BrotliHuffmanTreeRelease(&block_len_trees[i]);
+    }
+  }
+
+  if (BrotliWrite(output, ringbuffer, pos & ringbuffer_mask) < 0) {
+    ok = 0;
+  }
+  free(ringbuffer);
+  return ok;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/brotli/dec/decode.h b/brotli/dec/decode.h
new file mode 100644
index 0000000..760ec79
--- /dev/null
+++ b/brotli/dec/decode.h
@@ -0,0 +1,51 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// API for Brotli decompression
+
+#ifndef BROTLI_DEC_DECODE_H_
+#define BROTLI_DEC_DECODE_H_
+
+#include "./streams.h"
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Sets *decoded_size to the decompressed size of the given encoded stream.
+// Returns 1 on success, 0 on failure.
+int BrotliDecompressedSize(size_t encoded_size,
+                           const uint8_t* encoded_buffer,
+                           size_t* decoded_size);
+
+// Decompresses the data in encoded_buffer into decoded_buffer, and sets
+// *decoded_size to the decompressed length.
+// Returns 0 if there was either a bit stream error or memory allocation error,
+// and 1 otherwise.
+// If decoded size is zero, returns 1 and keeps decoded_buffer unchanged.
+int BrotliDecompressBuffer(size_t encoded_size,
+                           const uint8_t* encoded_buffer,
+                           size_t* decoded_size,
+                           uint8_t* decoded_buffer);
+
+// Same as above, but uses the specified input and output callbacks instead of
+// reading from and writing to pre-allocated memory buffers.
+int BrotliDecompress(BrotliInput input, BrotliOutput output);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // BROTLI_DEC_DECODE_H_
diff --git a/brotli/dec/huffman.c b/brotli/dec/huffman.c
new file mode 100644
index 0000000..6327792
--- /dev/null
+++ b/brotli/dec/huffman.c
@@ -0,0 +1,252 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utilities for building and looking up Huffman trees.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman.h"
+#include "./safe_malloc.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NON_EXISTENT_SYMBOL (-1)
+#define MAX_ALLOWED_CODE_LENGTH      15
+
+static void TreeNodeInit(HuffmanTreeNode* const node) {
+  node->children_ = -1;   // means: 'unassigned so far'
+}
+
+static int NodeIsEmpty(const HuffmanTreeNode* const node) {
+  return (node->children_ < 0);
+}
+
+static int IsFull(const HuffmanTree* const tree) {
+  return (tree->num_nodes_ == tree->max_nodes_);
+}
+
+static void AssignChildren(HuffmanTree* const tree,
+                           HuffmanTreeNode* const node) {
+  HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_;
+  node->children_ = (int)(children - node);
+  assert(children - node == (int)(children - node));
+  tree->num_nodes_ += 2;
+  TreeNodeInit(children + 0);
+  TreeNodeInit(children + 1);
+}
+
+static int TreeInit(HuffmanTree* const tree, int num_leaves) {
+  assert(tree != NULL);
+  if (num_leaves == 0) return 0;
+  // We allocate maximum possible nodes in the tree at once.
+  // Note that a Huffman tree is a full binary tree; and in a full binary tree
+  // with L leaves, the total number of nodes N = 2 * L - 1.
+  tree->max_nodes_ = 2 * num_leaves - 1;
+  assert(tree->max_nodes_ < (1 << 16));   // limit for the lut_jump_ table
+  tree->root_ = (HuffmanTreeNode*)BrotliSafeMalloc((uint64_t)tree->max_nodes_,
+                                                  sizeof(*tree->root_));
+  if (tree->root_ == NULL) return 0;
+  TreeNodeInit(tree->root_);  // Initialize root.
+  tree->num_nodes_ = 1;
+  memset(tree->lut_bits_, 255, sizeof(tree->lut_bits_));
+  memset(tree->lut_jump_, 0, sizeof(tree->lut_jump_));
+  return 1;
+}
+
+void BrotliHuffmanTreeRelease(HuffmanTree* const tree) {
+  if (tree != NULL) {
+    free(tree->root_);
+    tree->root_ = NULL;
+    tree->max_nodes_ = 0;
+    tree->num_nodes_ = 0;
+  }
+}
+
+// Utility: converts Huffman code lengths to corresponding Huffman codes.
+// 'huff_codes' should be pre-allocated.
+// Returns false in case of error (memory allocation, invalid codes).
+static int HuffmanCodeLengthsToCodes(const uint8_t* const code_lengths,
+                                     int code_lengths_size,
+                                     int* const huff_codes) {
+  int symbol;
+  int code_len;
+  int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  int curr_code;
+  int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  int max_code_length = 0;
+
+  assert(code_lengths != NULL);
+  assert(code_lengths_size > 0);
+  assert(huff_codes != NULL);
+
+  // Calculate max code length.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > max_code_length) {
+      max_code_length = code_lengths[symbol];
+    }
+  }
+  if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0;
+
+  // Calculate code length histogram.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    ++code_length_hist[code_lengths[symbol]];
+  }
+  code_length_hist[0] = 0;
+
+  // Calculate the initial values of 'next_codes' for each code length.
+  // next_codes[code_len] denotes the code to be assigned to the next symbol
+  // of code length 'code_len'.
+  curr_code = 0;
+  next_codes[0] = -1;  // Unused, as code length = 0 implies code doesn't exist.
+  for (code_len = 1; code_len <= max_code_length; ++code_len) {
+    curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
+    next_codes[code_len] = curr_code;
+  }
+
+  // Get symbols.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > 0) {
+      huff_codes[symbol] = next_codes[code_lengths[symbol]]++;
+    } else {
+      huff_codes[symbol] = NON_EXISTENT_SYMBOL;
+    }
+  }
+  return 1;
+}
+
+static const uint8_t kReverse7[128] = {
+  0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
+  4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
+  2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
+  6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
+  1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
+  5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
+  3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
+  7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
+};
+
+static int ReverseBitsShort(int bits, int num_bits) {
+  return kReverse7[bits] >> (7 - num_bits);
+}
+
+static int TreeAddSymbol(HuffmanTree* const tree,
+                         int symbol, int code, int code_length) {
+  int step = HUFF_LUT_BITS;
+  int base_code;
+  HuffmanTreeNode* node = tree->root_;
+  const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
+  assert(symbol == (int16_t)symbol);
+  if (code_length <= HUFF_LUT_BITS) {
+    int i = 1 << (HUFF_LUT_BITS - code_length);
+    base_code = ReverseBitsShort(code, code_length);
+    do {
+      int idx;
+      --i;
+      idx = base_code | (i << code_length);
+      tree->lut_symbol_[idx] = (int16_t)symbol;
+      tree->lut_bits_[idx] = code_length;
+    } while (i > 0);
+  } else {
+    base_code = ReverseBitsShort((code >> (code_length - HUFF_LUT_BITS)),
+                                 HUFF_LUT_BITS);
+  }
+  while (code_length-- > 0) {
+    if (node >= max_node) {
+      return 0;
+    }
+    if (NodeIsEmpty(node)) {
+      if (IsFull(tree)) return 0;    // error: too many symbols.
+      AssignChildren(tree, node);
+    } else if (!HuffmanTreeNodeIsNotLeaf(node)) {
+      return 0;  // leaf is already occupied.
+    }
+    node += node->children_ + ((code >> code_length) & 1);
+    if (--step == 0) {
+      tree->lut_jump_[base_code] = (int16_t)(node - tree->root_);
+    }
+  }
+  if (NodeIsEmpty(node)) {
+    node->children_ = 0;      // turn newly created node into a leaf.
+  } else if (HuffmanTreeNodeIsNotLeaf(node)) {
+    return 0;   // trying to assign a symbol to already used code.
+  }
+  node->symbol_ = symbol;  // Add symbol in this node.
+  return 1;
+}
+
+int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+                                   const uint8_t* const code_lengths,
+                                   int code_lengths_size) {
+  int symbol;
+  int num_symbols = 0;
+  int root_symbol = 0;
+
+  assert(tree != NULL);
+  assert(code_lengths != NULL);
+
+  // Find out number of symbols and the root symbol.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > 0) {
+      // Note: code length = 0 indicates non-existent symbol.
+      ++num_symbols;
+      root_symbol = symbol;
+    }
+  }
+
+  // Initialize the tree. Will fail for num_symbols = 0
+  if (!TreeInit(tree, num_symbols)) return 0;
+
+  // Build tree.
+  if (num_symbols == 1) {  // Trivial case.
+    const int max_symbol = code_lengths_size;
+    if (root_symbol < 0 || root_symbol >= max_symbol) {
+      BrotliHuffmanTreeRelease(tree);
+      return 0;
+    }
+    return TreeAddSymbol(tree, root_symbol, 0, 0);
+  } else {  // Normal case.
+    int ok = 0;
+
+    // Get Huffman codes from the code lengths.
+    int* const codes =
+        (int*)BrotliSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
+    if (codes == NULL) goto End;
+
+    if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
+      goto End;
+    }
+
+    // Add symbols one-by-one.
+    for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+      if (code_lengths[symbol] > 0) {
+        if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) {
+          goto End;
+        }
+      }
+    }
+    ok = 1;
+ End:
+    free(codes);
+    ok = ok && IsFull(tree);
+    if (!ok) BrotliHuffmanTreeRelease(tree);
+    return ok;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/brotli/dec/huffman.h b/brotli/dec/huffman.h
new file mode 100644
index 0000000..f1a671d
--- /dev/null
+++ b/brotli/dec/huffman.h
@@ -0,0 +1,74 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utilities for building and looking up Huffman trees.
+
+#ifndef BROTLI_DEC_HUFFMAN_H_
+#define BROTLI_DEC_HUFFMAN_H_
+
+#include <assert.h>
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// A node of a Huffman tree.
+typedef struct {
+  int symbol_;
+  int children_;  // delta offset to both children (contiguous) or 0 if leaf.
+} HuffmanTreeNode;
+
+// Huffman Tree.
+#define HUFF_LUT_BITS 7
+#define HUFF_LUT (1U << HUFF_LUT_BITS)
+typedef struct HuffmanTree HuffmanTree;
+struct HuffmanTree {
+  // Fast lookup for short bit lengths.
+  uint8_t lut_bits_[HUFF_LUT];
+  int16_t lut_symbol_[HUFF_LUT];
+  int16_t lut_jump_[HUFF_LUT];
+  // Complete tree for lookups.
+  HuffmanTreeNode* root_;   // all the nodes, starting at root.
+  int max_nodes_;           // max number of nodes
+  int num_nodes_;           // number of currently occupied nodes
+};
+
+// Returns true if the given node is not a leaf of the Huffman tree.
+static BROTLI_INLINE int HuffmanTreeNodeIsNotLeaf(
+    const HuffmanTreeNode* const node) {
+  return node->children_;
+}
+
+// Go down one level. Most critical function. 'right_child' must be 0 or 1.
+static BROTLI_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
+    const HuffmanTreeNode* node, int right_child) {
+  return node + node->children_ + right_child;
+}
+
+// Releases the nodes of the Huffman tree.
+// Note: It does NOT free 'tree' itself.
+void BrotliHuffmanTreeRelease(HuffmanTree* const tree);
+
+// Builds Huffman tree assuming code lengths are implicitly in symbol order.
+// Returns false in case of error (invalid tree or memory error).
+int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+                                   const uint8_t* const code_lengths,
+                                   int code_lengths_size);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // BROTLI_DEC_HUFFMAN_H_
diff --git a/brotli/dec/prefix.h b/brotli/dec/prefix.h
new file mode 100644
index 0000000..500bd10
--- /dev/null
+++ b/brotli/dec/prefix.h
@@ -0,0 +1,64 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Lookup tables to map prefix codes to value ranges. This is used during
+// decoding of the block lengths, literal insertion lengths and copy lengths.
+
+#ifndef BROTLI_DEC_PREFIX_H_
+#define BROTLI_DEC_PREFIX_H_
+
+// Represents the range of values belonging to a prefix code:
+// [offset, offset + 2^nbits)
+struct PrefixCodeRange {
+  int offset;
+  int nbits;
+};
+
+static const struct PrefixCodeRange kBlockLengthPrefixCode[] = {
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+static const struct PrefixCodeRange kInsertLengthPrefixCode[] = {
+  {   0,  0}, {   1,  0}, {  2,   0}, {    3,  0},
+  {   4,  0}, {   5,  0}, {  6,   1}, {    8,  1},
+  {  10,  2}, {  14,  2}, { 18,   3}, {   26,  3},
+  {  34,  4}, {  50,  4}, { 66,   5}, {   98,  5},
+  { 130,  6}, { 194,  7}, { 322,  8}, {  578,  9},
+  {1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
+};
+
+static const struct PrefixCodeRange kCopyLengthPrefixCode[] = {
+  {  2, 0}, {   3,  0}, {   4,  0}, {   5,  0},
+  {  6, 0}, {   7,  0}, {   8,  0}, {   9,  0},
+  { 10, 1}, {  12,  1}, {  14,  2}, {  18,  2},
+  { 22, 3}, {  30,  3}, {  38,  4}, {  54,  4},
+  { 70, 5}, { 102,  5}, { 134,  6}, { 198,  7},
+  {326, 8}, { 582,  9}, {1094, 10}, {2118, 24},
+};
+
+static const int kInsertRangeLut[9] = {
+  0, 0, 8, 8, 0, 16, 8, 16, 16,
+};
+
+static const int kCopyRangeLut[9] = {
+  0, 8, 0, 8, 16, 0, 16, 8, 16,
+};
+
+#endif  // BROTLI_DEC_PREFIX_H_
diff --git a/brotli/dec/safe_malloc.c b/brotli/dec/safe_malloc.c
new file mode 100644
index 0000000..41fa480
--- /dev/null
+++ b/brotli/dec/safe_malloc.c
@@ -0,0 +1,41 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Size-checked memory allocation.
+
+#include <stdlib.h>
+#include "./safe_malloc.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Returns 0 in case of overflow of nmemb * size.
+static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
+  const uint64_t total_size = nmemb * size;
+  if (nmemb == 0) return 1;
+  if ((uint64_t)size > BROTLI_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+  if (total_size != (size_t)total_size) return 0;
+  return 1;
+}
+
+void* BrotliSafeMalloc(uint64_t nmemb, size_t size) {
+  if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+  assert(nmemb * size > 0);
+  return malloc((size_t)(nmemb * size));
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/brotli/dec/safe_malloc.h b/brotli/dec/safe_malloc.h
new file mode 100644
index 0000000..5a334fc
--- /dev/null
+++ b/brotli/dec/safe_malloc.h
@@ -0,0 +1,43 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Size-checked memory allocation.
+
+#ifndef BROTLI_UTILS_UTILS_H_
+#define BROTLI_UTILS_UTILS_H_
+
+#include <assert.h>
+
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// This is the maximum memory amount that we will ever try to allocate.
+#define BROTLI_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+
+// size-checking safe malloc/calloc: verify that the requested size is not too
+// large, or return NULL. You don't need to call these for constructs like
+// malloc(sizeof(foo)), but only if there's font-dependent size involved
+// somewhere (like: malloc(decoded_size * sizeof(*something))). That's why this
+// safe malloc() borrows the signature from calloc(), pointing at the dangerous
+// underlying multiply involved.
+void* BrotliSafeMalloc(uint64_t nmemb, size_t size);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* BROTLI_UTILS_UTILS_H_ */
diff --git a/brotli/dec/streams.c b/brotli/dec/streams.c
new file mode 100644
index 0000000..89e030a
--- /dev/null
+++ b/brotli/dec/streams.c
@@ -0,0 +1,116 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for streaming input and output.
+
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+#include "./streams.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+int BrotliMemInputFunction(void* data, uint8_t* buf, size_t count) {
+  BrotliMemInput* input = (BrotliMemInput*)data;
+  if (input->pos > input->length) {
+    return -1;
+  }
+  if (input->pos + count > input->length) {
+    count = input->length - input->pos;
+  }
+  memcpy(buf, input->buffer + input->pos, count);
+  input->pos += count;
+  return (int)count;
+}
+
+BrotliInput BrotliInitMemInput(const uint8_t* buffer, size_t length,
+                               BrotliMemInput* mem_input) {
+  BrotliInput input;
+  mem_input->buffer = buffer;
+  mem_input->length = length;
+  mem_input->pos = 0;
+  input.cb_ = &BrotliMemInputFunction;
+  input.data_ = mem_input;
+  return input;
+}
+
+int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count) {
+  BrotliMemOutput* output = (BrotliMemOutput*)data;
+  if (output->pos + count > output->length) {
+    return -1;
+  }
+  memcpy(output->buffer + output->pos, buf, count);
+  output->pos += count;
+  return (int)count;
+}
+
+BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
+                                 BrotliMemOutput* mem_output) {
+  BrotliOutput output;
+  mem_output->buffer = buffer;
+  mem_output->length = length;
+  mem_output->pos = 0;
+  output.cb_ = &BrotliMemOutputFunction;
+  output.data_ = mem_output;
+  return output;
+}
+
+int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count) {
+#ifndef _WIN32
+  return read(STDIN_FILENO, buf, count);
+#else
+  return -1;
+#endif
+}
+
+BrotliInput BrotliStdinInput() {
+  BrotliInput in;
+  in.cb_ = BrotliStdinInputFunction;
+  in.data_ = NULL;
+  return in;
+}
+
+int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count) {
+#ifndef _WIN32
+  return write(STDOUT_FILENO, buf, count);
+#else
+  return -1;
+#endif
+}
+
+BrotliOutput BrotliStdoutOutput() {
+  BrotliOutput out;
+  out.cb_ = BrotliStdoutOutputFunction;
+  out.data_ = NULL;
+  return out;
+}
+
+int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count) {
+  return (int)fwrite(buf, 1, count, (FILE*)data);
+}
+
+BrotliOutput BrotliFileOutput(FILE* f) {
+  BrotliOutput out;
+  out.cb_ = BrotliFileOutputFunction;
+  out.data_ = f;
+  return out;
+}
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/brotli/dec/streams.h b/brotli/dec/streams.h
new file mode 100644
index 0000000..b055234
--- /dev/null
+++ b/brotli/dec/streams.h
@@ -0,0 +1,102 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for streaming input and output.
+
+#ifndef BROTLI_DEC_STREAMS_H_
+#define BROTLI_DEC_STREAMS_H_
+
+#include <stdio.h>
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Function pointer type used to read len bytes into buf. Returns the
+// number of bytes read or -1 on error.
+typedef int (*BrotliInputFunction)(void* data, uint8_t* buf, size_t len);
+
+// Input callback function with associated data.
+typedef struct {
+  BrotliInputFunction cb_;
+  void* data_;
+} BrotliInput;
+
+// Reads len bytes into buf, using the in callback.
+static BROTLI_INLINE int BrotliRead(BrotliInput in, uint8_t* buf, size_t len) {
+  return in.cb_(in.data_, buf, len);
+}
+
+// Function pointer type used to write len bytes into buf. Returns the
+// number of bytes written or -1 on error.
+typedef int (*BrotliOutputFunction)(void* data, const uint8_t* buf, size_t len);
+
+// Output callback function with associated data.
+typedef struct {
+  BrotliOutputFunction cb_;
+  void* data_;
+} BrotliOutput;
+
+// Writes len bytes into buf, using the out callback.
+static BROTLI_INLINE int BrotliWrite(BrotliOutput out,
+                                     const uint8_t* buf, size_t len) {
+  return out.cb_(out.data_, buf, len);
+}
+
+// Memory region with position.
+typedef struct {
+  const uint8_t* buffer;
+  size_t length;
+  size_t pos;
+} BrotliMemInput;
+
+// Input callback where *data is a BrotliMemInput struct.
+int BrotliMemInputFunction(void* data, uint8_t* buf, size_t count);
+
+// Returns an input callback that wraps the given memory region.
+BrotliInput BrotliInitMemInput(const uint8_t* buffer, size_t length,
+                               BrotliMemInput* mem_input);
+
+// Output buffer with position.
+typedef struct {
+  uint8_t* buffer;
+  size_t length;
+  size_t pos;
+} BrotliMemOutput;
+
+// Output callback where *data is a BrotliMemOutput struct.
+int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count);
+
+// Returns an output callback that wraps the given memory region.
+BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
+                                 BrotliMemOutput* mem_output);
+
+// Input callback that reads from standard input.
+int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count);
+BrotliInput BrotliStdinInput();
+
+// Output callback that writes to standard output.
+int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count);
+BrotliOutput BrotliStdoutOutput();
+
+// Output callback that writes to a file.
+int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count);
+BrotliOutput BrotliFileOutput(FILE* f);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // BROTLI_DEC_STREAMS_H_
diff --git a/brotli/dec/types.h b/brotli/dec/types.h
new file mode 100644
index 0000000..41696e4
--- /dev/null
+++ b/brotli/dec/types.h
@@ -0,0 +1,41 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Common types
+
+#ifndef BROTLI_DEC_TYPES_H_
+#define BROTLI_DEC_TYPES_H_
+
+#include <stddef.h>  // for size_t
+
+#ifndef _MSC_VER
+#include <inttypes.h>
+#ifdef __STRICT_ANSI__
+#define BROTLI_INLINE
+#else  /* __STRICT_ANSI__ */
+#define BROTLI_INLINE inline
+#endif
+#else
+typedef signed   char int8_t;
+typedef unsigned char uint8_t;
+typedef signed   short int16_t;
+typedef unsigned short uint16_t;
+typedef signed   int int32_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+#define BROTLI_INLINE __forceinline
+#endif  /* _MSC_VER */
+
+#endif  // BROTLI_DEC_TYPES_H_
diff --git a/brotli/enc/Makefile b/brotli/enc/Makefile
new file mode 100644
index 0000000..c7041dc
--- /dev/null
+++ b/brotli/enc/Makefile
@@ -0,0 +1,11 @@
+#brotli/enc
+
+include ../../shared.mk
+
+OBJS = backward_references.o block_splitter.o encode.o entropy_encode.o histogram.o literal_cost.o prefix.o
+
+all : $(OBJS)
+
+clean :
+	rm -f $(OBJS) $(SO)
+
diff --git a/brotli/enc/README b/brotli/enc/README
new file mode 100644
index 0000000..c988ae7
--- /dev/null
+++ b/brotli/enc/README
@@ -0,0 +1,3 @@
+This directory holds the encoder for brotli compression format.
+
+Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
diff --git a/brotli/enc/backward_references.cc b/brotli/enc/backward_references.cc
new file mode 100644
index 0000000..71554fe
--- /dev/null
+++ b/brotli/enc/backward_references.cc
@@ -0,0 +1,143 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#include "./backward_references.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "./command.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(size_t num_bytes,
+                              size_t position,
+                              const uint8_t* ringbuffer,
+                              const float* literal_cost,
+                              size_t ringbuffer_mask,
+                              const size_t max_backward_limit,
+                              Hasher* hasher,
+                              std::vector<Command>* commands) {
+  // Length heuristic that seems to help probably by better selection
+  // of lazy matches of similar lengths.
+  int insert_length = 0;
+  size_t i = position & ringbuffer_mask;
+  const int i_diff = position - i;
+  const size_t i_end = i + num_bytes;
+
+  double average_cost = 0.0;
+  for (int k = position; k < position + num_bytes; ++k) {
+    average_cost += literal_cost[k & ringbuffer_mask];
+  }
+  average_cost /= num_bytes;
+  hasher->set_average_cost(average_cost);
+
+  while (i + 2 < i_end) {
+    size_t best_len = 0;
+    size_t best_len_code = 0;
+    size_t best_dist = 0;
+    double best_score = 0;
+    size_t max_distance = std::min(i + i_diff, max_backward_limit);
+    hasher->set_insert_length(insert_length);
+    bool match_found = hasher->FindLongestMatch(
+        ringbuffer, literal_cost, ringbuffer_mask,
+        i + i_diff, i_end - i, max_distance,
+        &best_len, &best_len_code, &best_dist, &best_score);
+    if (match_found) {
+      // Found a match. Let's look for something even better ahead.
+      int delayed_backward_references_in_row = 0;
+      while (i + 4 < i_end &&
+             delayed_backward_references_in_row < 4) {
+        size_t best_len_2 = 0;
+        size_t best_len_code_2 = 0;
+        size_t best_dist_2 = 0;
+        double best_score_2 = 0;
+        max_distance = std::min(i + i_diff + 1, max_backward_limit);
+        hasher->Store(ringbuffer + i, i + i_diff);
+        match_found = hasher->FindLongestMatch(
+            ringbuffer, literal_cost, ringbuffer_mask,
+            i + i_diff + 1, i_end - i - 1, max_distance,
+            &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
+        double cost_diff_lazy = 0;
+        if (best_len >= 4) {
+          cost_diff_lazy +=
+              literal_cost[(i + 4) & ringbuffer_mask] - average_cost;
+        }
+        {
+          const int tail_length = best_len_2 - best_len + 1;
+          for (int k = 0; k < tail_length; ++k) {
+            cost_diff_lazy -=
+                literal_cost[(i + best_len + k) & ringbuffer_mask] -
+                average_cost;
+          }
+        }
+        // If we are not inserting any symbols, inserting one is more
+        // expensive than if we were inserting symbols anyways.
+        if (insert_length < 1) {
+          cost_diff_lazy += 1.0;
+        }
+        // Add bias to slightly avoid lazy matching.
+        cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
+        cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask];
+
+        if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
+          // Ok, let's just write one byte for now and start a match from the
+          // next byte.
+          ++insert_length;
+          ++delayed_backward_references_in_row;
+          best_len = best_len_2;
+          best_len_code = best_len_code_2;
+          best_dist = best_dist_2;
+          best_score = best_score_2;
+          i++;
+        } else {
+          break;
+        }
+      }
+      Command cmd;
+      cmd.insert_length_ = insert_length;
+      cmd.copy_length_ = best_len;
+      cmd.copy_length_code_ = best_len_code;
+      cmd.copy_distance_ = best_dist;
+      commands->push_back(cmd);
+      hasher->set_last_distance(best_dist);
+
+      insert_length = 0;
+      ++i;
+      for (int j = 1; j < best_len; ++j) {
+        if (i + 2 < i_end) {
+          hasher->Store(ringbuffer + i, i + i_diff);
+        }
+        ++i;
+      }
+    } else {
+      ++insert_length;
+      hasher->Store(ringbuffer + i, i + i_diff);
+      ++i;
+    }
+  }
+  insert_length += (i_end - i);
+
+  if (insert_length > 0) {
+    Command cmd;
+    cmd.insert_length_ = insert_length;
+    cmd.copy_length_ = 0;
+    cmd.copy_distance_ = 0;
+    commands->push_back(cmd);
+  }
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/backward_references.h b/brotli/enc/backward_references.h
new file mode 100644
index 0000000..f666ef6
--- /dev/null
+++ b/brotli/enc/backward_references.h
@@ -0,0 +1,39 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include <stdint.h>
+#include <vector>
+
+#include "./hash.h"
+#include "./command.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(size_t num_bytes,
+                              size_t position,
+                              const uint8_t* ringbuffer,
+                              const float* literal_cost,
+                              size_t ringbuffer_mask,
+                              const size_t max_backward_limit,
+                              Hasher* hasher,
+                              std::vector<Command>* commands);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BACKWARD_REFERENCES_H_
diff --git a/brotli/enc/bit_cost.h b/brotli/enc/bit_cost.h
new file mode 100644
index 0000000..5d6ef0f
--- /dev/null
+++ b/brotli/enc/bit_cost.h
@@ -0,0 +1,137 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to estimate the bit cost of Huffman trees.
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include <stdint.h>
+
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+
+namespace brotli {
+
+static const int kHuffmanExtraBits[kCodeLengthCodes] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
+};
+
+static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
+  int nbits = 0;
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
+  }
+  return nbits;
+}
+
+static inline int HuffmanTreeBitCost(
+    const Histogram<kCodeLengthCodes>& histogram,
+    const EntropyCode<kCodeLengthCodes>& entropy) {
+  return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
+}
+
+static inline int HuffmanBitCost(const uint8_t* depth, int length) {
+  int max_depth = 1;
+  int histogram[kCodeLengthCodes] = { 0 };
+  int tail_start = 0;
+  // compute histogram of compacted huffman tree
+  for (int i = 0; i < length;) {
+    const int value = depth[i];
+    if (value > max_depth) {
+      max_depth = value;
+    }
+    int reps = 1;
+    for (int k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    i += reps;
+    if (value == 0) {
+      if (reps < 3) {
+        histogram[0] += reps;
+      } else {
+        reps -= 3;
+        while (reps >= 0) {
+          ++histogram[17];
+          reps >>= 3;
+          --reps;
+        }
+      }
+    } else {
+      tail_start = i;
+      ++histogram[value];
+      --reps;
+      if (reps < 3) {
+        histogram[value] += reps;
+      } else {
+        reps -= 3;
+        while (reps >= 0) {
+          ++histogram[16];
+          reps >>= 2;
+          --reps;
+        }
+      }
+    }
+  }
+
+  // create huffman tree of huffman tree
+  uint8_t cost[kCodeLengthCodes] = { 0 };
+  CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
+  // account for rle extra bits
+  cost[16] += 2;
+  cost[17] += 3;
+
+  int tree_size = 0;
+  int bits = 6 + 3 * max_depth;  // huffman tree of huffman tree cost
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    bits += histogram[i] * cost[i];  // huffman tree bit cost
+    tree_size += histogram[i];
+  }
+  return bits;
+}
+
+template<int kSize>
+double PopulationCost(const Histogram<kSize>& histogram) {
+  if (histogram.total_count_ == 0) {
+    return 11;
+  }
+  int count = 0;
+  for (int i = 0; i < kSize && count < 5; ++i) {
+    if (histogram.data_[i] > 0) {
+      ++count;
+    }
+  }
+  if (count == 1) {
+    return 11;
+  }
+  if (count == 2) {
+    return 19 + histogram.total_count_;
+  }
+  uint8_t depth[kSize] = { 0 };
+  CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
+  int bits = 0;
+  for (int i = 0; i < kSize; ++i) {
+    bits += histogram.data_[i] * depth[i];
+  }
+  if (count == 3) {
+    bits += 27;
+  } else {
+    bits += HuffmanBitCost(depth, kSize);
+  }
+  return bits;
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BIT_COST_H_
diff --git a/brotli/enc/block_splitter.cc b/brotli/enc/block_splitter.cc
new file mode 100644
index 0000000..e3d7363
--- /dev/null
+++ b/brotli/enc/block_splitter.cc
@@ -0,0 +1,410 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#include "./block_splitter.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <map>
+
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+static const int kMaxLiteralHistograms = 48;
+static const int kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 26;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const int kLiteralStrideLength = 70;
+static const int kCommandStrideLength = 40;
+static const int kSymbolsPerLiteralHistogram = 550;
+static const int kSymbolsPerCommandHistogram = 530;
+static const int kSymbolsPerDistanceHistogram = 550;
+static const int kMinLengthForBlockSplitting = 128;
+static const int kIterMulForRefining = 2;
+static const int kMinItersForRefining = 100;
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+                             const uint8_t* data,
+                             std::vector<uint8_t>* literals) {
+  // Count how many we have.
+  size_t total_length = 0;
+  for (int i = 0; i < cmds.size(); ++i) {
+    total_length += cmds[i].insert_length_;
+  }
+  if (total_length == 0) {
+    return;
+  }
+
+  // Allocate.
+  literals->resize(total_length);
+
+  // Loop again, and copy this time.
+  size_t pos = 0;
+  size_t from_pos = 0;
+  for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
+    memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
+    pos += cmds[i].insert_length_;
+    from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
+  }
+}
+
+void CopyCommandsToByteArray(const std::vector<Command>& cmds,
+                             std::vector<uint16_t>* insert_and_copy_codes,
+                             std::vector<uint8_t>* distance_prefixes) {
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command& cmd = cmds[i];
+    insert_and_copy_codes->push_back(cmd.command_prefix_);
+    if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+      distance_prefixes->push_back(cmd.distance_prefix_);
+    }
+  }
+}
+
+template<int kSize>
+double HistogramAddEval(const Histogram<kSize>& a,
+                        const Histogram<kSize>& b) {
+  int total = a.total_count_ + b.total_count_;
+  double retval = total * FastLog2(total);
+  for (int i = 0; i < kSize; ++i) {
+    int count = a.data_[i] + b.data_[i];
+    retval -= count * FastLog2(count);
+  }
+  return retval;
+}
+
+template<typename HistogramType, typename DataType>
+void InitialEntropyCodes(const DataType* data, size_t length,
+                         int literals_per_histogram,
+                         int max_histograms,
+                         size_t stride,
+                         std::vector<HistogramType>* vec) {
+  int total_histograms = length / literals_per_histogram + 1;
+  if (total_histograms > max_histograms) {
+    total_histograms = max_histograms;
+  }
+  unsigned int seed = 7;
+  int block_length = length / total_histograms;
+  for (int i = 0; i < total_histograms; ++i) {
+    int pos = length * i / total_histograms;
+    if (i != 0) {
+      pos += rand_r(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    HistogramType histo;
+    histo.Add(data + pos, stride);
+    vec->push_back(histo);
+  }
+}
+
+template<typename HistogramType>
+int FindClosest(const HistogramType& sample,
+                const std::vector<HistogramType>& vec) {
+  double best_distance = 1e99;
+  int best_ix = 0;
+  for (int i = 0; i < vec.size(); ++i) {
+    double distance = HistogramAddEval(sample, vec[i]);
+    if (distance < best_distance) {
+      best_ix = i;
+      best_distance = distance;
+    }
+  }
+  return best_ix;
+}
+
+template<typename HistogramType, typename DataType>
+void RandomSample(unsigned int* seed,
+                  const DataType* data,
+                  size_t length,
+                  size_t stride,
+                  HistogramType* sample) {
+  size_t pos = rand_r(seed) % (length - stride);
+  sample->Add(data + pos, stride);
+}
+
+template<typename HistogramType, typename DataType>
+void RefineEntropyCodes(const DataType* data, size_t length,
+                        size_t stride,
+                        std::vector<HistogramType>* vec) {
+  const int iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  unsigned int seed = 7;
+  for (int iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    RandomSample(&seed, data, length, stride, &sample);
+    int ix = FindClosest(sample, *vec);
+    (*vec)[ix].AddHistogram(sample);
+  }
+}
+
+inline static float BitCost(int total, int count) {
+  return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
+}
+
+template<typename DataType, int kSize>
+void FindBlocks(const DataType* data, const size_t length,
+                const double block_switch_bitcost,
+                const std::vector<Histogram<kSize> > &vec,
+                uint8_t *block_id) {
+  if (vec.size() <= 1) {
+    for (int i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return;
+  }
+  int vecsize = vec.size();
+  double* insert_cost = new double[kSize * vecsize];
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
+  for (int i = 0; i < kSize; ++i) {
+    for (int j = 0; j < vecsize; ++j) {
+      insert_cost[i * vecsize + j] =
+          BitCost(vec[j].total_count_, vec[j].data_[i]);
+    }
+  }
+  double *cost = new double[vecsize];
+  memset(cost, 0, sizeof(cost[0]) * vecsize);
+  bool* switch_signal = new bool[length * vecsize];
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
+  // After each iteration of this loop, cost[k] will contain the difference
+  // between the minimum cost of arriving at the current byte position using
+  // entropy code k, and the minimum cost of arriving at the current byte
+  // position. This difference is capped at the block switch cost, and if it
+  // reaches block switch cost, it means that when we trace back from the last
+  // position, we need to switch here.
+  for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
+    int ix = byte_ix * vecsize;
+    int insert_cost_ix = data[byte_ix] * vecsize;
+    double min_cost = 1e99;
+    for (int k = 0; k < vecsize; ++k) {
+      // We are coding the symbol in data[byte_ix] with entropy code k.
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = k;
+      }
+    }
+    double block_switch_cost = block_switch_bitcost;
+    // More blocks for the beginning.
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
+    }
+    for (int k = 0; k < vecsize; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        cost[k] = block_switch_cost;
+        switch_signal[ix + k] = true;
+      }
+    }
+  }
+  // Now trace back from the last position and switch at the marked places.
+  int byte_ix = length - 1;
+  int ix = byte_ix * vecsize;
+  int cur_id = block_id[byte_ix];
+  while (byte_ix > 0) {
+    --byte_ix;
+    ix -= vecsize;
+    if (switch_signal[ix + cur_id]) {
+      cur_id = block_id[byte_ix];
+    }
+    block_id[byte_ix] = cur_id;
+  }
+  delete[] insert_cost;
+  delete[] cost;
+  delete[] switch_signal;
+}
+
+int RemapBlockIds(uint8_t* block_ids, const size_t length) {
+  std::map<uint8_t, uint8_t> new_id;
+  int next_id = 0;
+  for (int i = 0; i < length; ++i) {
+    if (new_id.find(block_ids[i]) == new_id.end()) {
+      new_id[block_ids[i]] = next_id;
+      ++next_id;
+    }
+  }
+  for (int i = 0; i < length; ++i) {
+    block_ids[i] = new_id[block_ids[i]];
+  }
+  return next_id;
+}
+
+template<typename HistogramType, typename DataType>
+void BuildBlockHistograms(const DataType* data, const size_t length,
+                          uint8_t* block_ids,
+                          std::vector<HistogramType>* histograms) {
+  int num_types = RemapBlockIds(block_ids, length);
+  histograms->clear();
+  histograms->resize(num_types);
+  for (int i = 0; i < length; ++i) {
+    (*histograms)[block_ids[i]].Add(data[i]);
+  }
+}
+
+template<typename HistogramType, typename DataType>
+void ClusterBlocks(const DataType* data, const size_t length,
+                   uint8_t* block_ids) {
+  std::vector<HistogramType> histograms;
+  std::vector<int> block_index(length);
+  int cur_idx = 0;
+  HistogramType cur_histogram;
+  for (int i = 0; i < length; ++i) {
+    bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
+    block_index[i] = cur_idx;
+    cur_histogram.Add(data[i]);
+    if (block_boundary) {
+      histograms.push_back(cur_histogram);
+      cur_histogram.Clear();
+      ++cur_idx;
+    }
+  }
+  std::vector<HistogramType> clustered_histograms;
+  std::vector<int> histogram_symbols;
+  // Block ids need to fit in one byte.
+  static const int kMaxNumberOfBlockTypes = 256;
+  ClusterHistograms(histograms, 1, histograms.size(),
+                    kMaxNumberOfBlockTypes,
+                    &clustered_histograms,
+                    &histogram_symbols);
+  for (int i = 0; i < length; ++i) {
+    block_ids[i] = histogram_symbols[block_index[i]];
+  }
+}
+
+void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
+  int cur_id = block_ids[0];
+  int cur_length = 1;
+  split->num_types_ = -1;
+  for (int i = 1; i < block_ids.size(); ++i) {
+    if (block_ids[i] != cur_id) {
+      split->types_.push_back(cur_id);
+      split->lengths_.push_back(cur_length);
+      split->num_types_ = std::max(split->num_types_, cur_id);
+      cur_id = block_ids[i];
+      cur_length = 0;
+    }
+    ++cur_length;
+  }
+  split->types_.push_back(cur_id);
+  split->lengths_.push_back(cur_length);
+  split->num_types_ = std::max(split->num_types_, cur_id);
+  ++split->num_types_;
+}
+
+template<typename HistogramType, typename DataType>
+void SplitByteVector(const std::vector<DataType>& data,
+                     const int literals_per_histogram,
+                     const int max_histograms,
+                     const int sampling_stride_length,
+                     const double block_switch_cost,
+                     BlockSplit* split) {
+  if (data.empty()) {
+    split->num_types_ = 0;
+    return;
+  } else if (data.size() < kMinLengthForBlockSplitting) {
+    split->num_types_ = 1;
+    split->types_.push_back(0);
+    split->lengths_.push_back(data.size());
+    return;
+  }
+  std::vector<HistogramType> histograms;
+  // Find good entropy codes.
+  InitialEntropyCodes(data.data(), data.size(),
+                      literals_per_histogram,
+                      max_histograms,
+                      sampling_stride_length,
+                      &histograms);
+  RefineEntropyCodes(data.data(), data.size(),
+                     sampling_stride_length,
+                     &histograms);
+  // Find a good path through literals with the good entropy codes.
+  std::vector<uint8_t> block_ids(data.size());
+  for (int i = 0; i < 10; ++i) {
+    FindBlocks(data.data(), data.size(),
+               block_switch_cost,
+               histograms,
+               &block_ids[0]);
+    BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
+  }
+  ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
+  BuildBlockSplit(block_ids, split);
+}
+
+void SplitBlock(const std::vector<Command>& cmds,
+                const uint8_t* data,
+                BlockSplit* literal_split,
+                BlockSplit* insert_and_copy_split,
+                BlockSplit* dist_split) {
+  // Create a continuous array of literals.
+  std::vector<uint8_t> literals;
+  CopyLiteralsToByteArray(cmds, data, &literals);
+
+  // Compute prefix codes for commands.
+  std::vector<uint16_t> insert_and_copy_codes;
+  std::vector<uint8_t> distance_prefixes;
+  CopyCommandsToByteArray(cmds,
+                          &insert_and_copy_codes,
+                          &distance_prefixes);
+
+  SplitByteVector<HistogramLiteral>(
+      literals,
+      kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+      kLiteralStrideLength, kLiteralBlockSwitchCost,
+      literal_split);
+  SplitByteVector<HistogramCommand>(
+      insert_and_copy_codes,
+      kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+      kCommandStrideLength, kCommandBlockSwitchCost,
+      insert_and_copy_split);
+  SplitByteVector<HistogramDistance>(
+      distance_prefixes,
+      kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+      kCommandStrideLength, kDistanceBlockSwitchCost,
+      dist_split);
+}
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+                             int input_size,
+                             int target_length,
+                             std::vector<std::vector<Command> >* blocks) {
+  int num_blocks = input_size / target_length + 1;
+  int length_limit = input_size / num_blocks + 1;
+  int total_length = 0;
+  std::vector<Command> cur_block;
+  for (int i = 0; i < all_commands.size(); ++i) {
+    const Command& cmd = all_commands[i];
+    int cmd_length = cmd.insert_length_ + cmd.copy_length_;
+    if (total_length > length_limit) {
+      blocks->push_back(cur_block);
+      cur_block.clear();
+      total_length = 0;
+    }
+    cur_block.push_back(cmd);
+    total_length += cmd_length;
+  }
+  blocks->push_back(cur_block);
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/block_splitter.h b/brotli/enc/block_splitter.h
new file mode 100644
index 0000000..2a491e3
--- /dev/null
+++ b/brotli/enc/block_splitter.h
@@ -0,0 +1,77 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+
+#include "./command.h"
+
+namespace brotli {
+
+struct BlockSplit {
+  int num_types_;
+  std::vector<uint8_t> types_;
+  std::vector<int> type_codes_;
+  std::vector<int> lengths_;
+};
+
+struct BlockSplitIterator {
+  explicit BlockSplitIterator(const BlockSplit& split)
+      : split_(split), idx_(0), type_(0), length_(0) {
+    if (!split.lengths_.empty()) {
+      length_ = split.lengths_[0];
+    }
+  }
+
+  void Next() {
+    if (length_ == 0) {
+      ++idx_;
+      type_ = split_.types_[idx_];
+      length_ = split_.lengths_[idx_];
+    }
+    --length_;
+  }
+
+  const BlockSplit& split_;
+  int idx_;
+  int type_;
+  int length_;
+};
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+                             const uint8_t* data,
+                             std::vector<uint8_t>* literals);
+
+void SplitBlock(const std::vector<Command>& cmds,
+                const uint8_t* data,
+                BlockSplit* literal_split,
+                BlockSplit* insert_and_copy_split,
+                BlockSplit* dist_split);
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+                             int input_size,
+                             int target_length,
+                             std::vector<std::vector<Command> >* blocks);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BLOCK_SPLITTER_H_
diff --git a/brotli/enc/cluster.h b/brotli/enc/cluster.h
new file mode 100644
index 0000000..855a88d
--- /dev/null
+++ b/brotli/enc/cluster.h
@@ -0,0 +1,288 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for clustering similar histograms together.
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <complex>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "./bit_cost.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+struct HistogramPair {
+  int idx1;
+  int idx2;
+  bool valid;
+  double cost_combo;
+  double cost_diff;
+};
+
+struct HistogramPairComparator {
+  bool operator()(const HistogramPair& p1, const HistogramPair& p2) {
+    if (p1.cost_diff != p2.cost_diff) {
+      return p1.cost_diff > p2.cost_diff;
+    }
+    return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
+  }
+};
+
+// Returns entropy reduction of the context map when we combine two clusters.
+inline double ClusterCostDiff(int size_a, int size_b) {
+  int size_c = size_a + size_b;
+  return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
+      size_c * FastLog2(size_c);
+}
+
+// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
+template<int kSize>
+void CompareAndPushToHeap(const Histogram<kSize>* out,
+                          const int* cluster_size,
+                          int idx1, int idx2,
+                          std::vector<HistogramPair>* pairs) {
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    int t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  bool store_pair = false;
+  HistogramPair p;
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.valid = true;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    store_pair = true;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    store_pair = true;
+  } else {
+    double threshold = pairs->empty() ? 1e99 :
+        std::max(0.0, (*pairs)[0].cost_diff);
+    Histogram<kSize> combo = out[idx1];
+    combo.AddHistogram(out[idx2]);
+    double cost_combo = PopulationCost(combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      store_pair = true;
+    }
+  }
+  if (store_pair) {
+    p.cost_diff += p.cost_combo;
+    pairs->push_back(p);
+    push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
+  }
+}
+
+template<int kSize>
+void HistogramCombine(Histogram<kSize>* out,
+                      int* cluster_size,
+                      int* symbols,
+                      int symbols_size,
+                      int max_clusters) {
+  double cost_diff_threshold = 0.0;
+  int min_cluster_size = 1;
+  std::set<int> all_symbols;
+  std::vector<int> clusters;
+  for (int i = 0; i < symbols_size; ++i) {
+    if (all_symbols.find(symbols[i]) == all_symbols.end()) {
+      all_symbols.insert(symbols[i]);
+      clusters.push_back(symbols[i]);
+    }
+  }
+
+  // We maintain a heap of histogram pairs, ordered by the bit cost reduction.
+  std::vector<HistogramPair> pairs;
+  for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
+    for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
+      CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
+                           &pairs);
+    }
+  }
+
+  while (clusters.size() > min_cluster_size) {
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    // Take the best pair from the top of heap.
+    int best_idx1 = pairs[0].idx1;
+    int best_idx2 = pairs[0].idx2;
+    out[best_idx1].AddHistogram(out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (int i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (int i = 0; i + 1 < clusters.size(); ++i) {
+      if (clusters[i] >= best_idx2) {
+        clusters[i] = clusters[i + 1];
+      }
+    }
+    clusters.pop_back();
+    // Invalidate pairs intersecting the just combined best pair.
+    for (int i = 0; i < pairs.size(); ++i) {
+      HistogramPair& p = pairs[i];
+      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
+          p.idx1 == best_idx2 || p.idx2 == best_idx2) {
+        p.valid = false;
+      }
+    }
+    // Pop invalid pairs from the top of the heap.
+    while (!pairs.empty() && !pairs[0].valid) {
+      pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
+      pairs.pop_back();
+    }
+    // Push new pairs formed with the combined histogram to the heap.
+    for (int i = 0; i < clusters.size(); ++i) {
+      CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
+    }
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving histogram from cur_symbol to candidate.
+template<int kSize>
+double HistogramBitCostDistance(const Histogram<kSize>& histogram,
+                                const Histogram<kSize>& candidate) {
+  if (histogram.total_count_ == 0) {
+    return 0.0;
+  }
+  Histogram<kSize> tmp = histogram;
+  tmp.AddHistogram(candidate);
+  return PopulationCost(tmp) - candidate.bit_cost_;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+template<int kSize>
+void HistogramRemap(const Histogram<kSize>* in, int in_size,
+                    Histogram<kSize>* out, int* symbols) {
+  std::set<int> all_symbols;
+  for (int i = 0; i < in_size; ++i) {
+    all_symbols.insert(symbols[i]);
+  }
+  for (int i = 0; i < in_size; ++i) {
+    int best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
+    for (std::set<int>::const_iterator k = all_symbols.begin();
+         k != all_symbols.end(); ++k) {
+      const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = *k;
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  // Recompute each out based on raw and symbols.
+  for (std::set<int>::const_iterator k = all_symbols.begin();
+       k != all_symbols.end(); ++k) {
+    out[*k].Clear();
+  }
+  for (int i = 0; i < in_size; ++i) {
+    out[symbols[i]].AddHistogram(in[i]);
+  }
+}
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+template<int kSize>
+void HistogramReindex(std::vector<Histogram<kSize> >* out,
+                      std::vector<int>* symbols) {
+  std::vector<Histogram<kSize> > tmp(*out);
+  std::map<int, int> new_index;
+  int next_index = 0;
+  for (int i = 0; i < symbols->size(); ++i) {
+    if (new_index.find((*symbols)[i]) == new_index.end()) {
+      new_index[(*symbols)[i]] = next_index;
+      (*out)[next_index] = tmp[(*symbols)[i]];
+      ++next_index;
+    }
+  }
+  out->resize(next_index);
+  for (int i = 0; i < symbols->size(); ++i) {
+    (*symbols)[i] = new_index[(*symbols)[i]];
+  }
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+template<int kSize>
+void ClusterHistograms(const std::vector<Histogram<kSize> >& in,
+                       int num_contexts, int num_blocks,
+                       int max_histograms,
+                       std::vector<Histogram<kSize> >* out,
+                       std::vector<int>* histogram_symbols) {
+  const int in_size = num_contexts * num_blocks;
+  std::vector<int> cluster_size(in_size, 1);
+  out->resize(in_size);
+  histogram_symbols->resize(in_size);
+  for (int i = 0; i < in_size; ++i) {
+    (*out)[i] = in[i];
+    (*out)[i].bit_cost_ = PopulationCost(in[i]);
+    (*histogram_symbols)[i] = i;
+  }
+
+  // Collapse similar histograms within a block type.
+  if (num_contexts > 1) {
+    for (int i = 0; i < num_blocks; ++i) {
+      HistogramCombine(&(*out)[0], &cluster_size[0],
+                       &(*histogram_symbols)[i * num_contexts], num_contexts,
+                       max_histograms);
+    }
+  }
+
+  // Collapse similar histograms.
+  HistogramCombine(&(*out)[0], &cluster_size[0],
+                   &(*histogram_symbols)[0], in_size,
+                   max_histograms);
+
+  // Find the optimal map from original histograms to the final ones.
+  HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
+
+  // Convert the context map to a canonical form.
+  HistogramReindex(out, histogram_symbols);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_CLUSTER_H_
diff --git a/brotli/enc/command.h b/brotli/enc/command.h
new file mode 100644
index 0000000..7a9f481
--- /dev/null
+++ b/brotli/enc/command.h
@@ -0,0 +1,46 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This class models a sequence of literals and a backward reference copy.
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+// Command holds a sequence of literals and a backward reference copy.
+class Command {
+ public:
+  Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
+              copy_distance_(0), distance_code_(0),
+              distance_prefix_(0), command_prefix_(0),
+              distance_extra_bits_(0), distance_extra_bits_value_(0) {}
+
+  uint32_t insert_length_;
+  uint32_t copy_length_;
+  uint32_t copy_length_code_;
+  uint32_t copy_distance_;
+  // Values <= 16 are short codes, values > 16 are distances shifted by 16.
+  uint32_t distance_code_;
+  uint16_t distance_prefix_;
+  uint16_t command_prefix_;
+  int distance_extra_bits_;
+  uint32_t distance_extra_bits_value_;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_COMMAND_H_
diff --git a/brotli/enc/context.h b/brotli/enc/context.h
new file mode 100644
index 0000000..9b015d2
--- /dev/null
+++ b/brotli/enc/context.h
@@ -0,0 +1,185 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to map previous bytes into a context id.
+
+#ifndef BROTLI_ENC_CONTEXT_H_
+#define BROTLI_ENC_CONTEXT_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+// Second-order context lookup table for UTF8 byte streams.
+//
+// If p1 and p2 are the previous two bytes, we calcualte the context as
+//
+//   context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
+//
+// If the previous two bytes are ASCII characters (i.e. < 128), this will be
+// equivalent to
+//
+//   context = 4 * context1(p1) + context2(p2),
+//
+// where context1 is based on the previous byte in the following way:
+//
+//   0  : non-ASCII control
+//   1  : \t, \n, \r
+//   2  : space
+//   3  : other punctuation
+//   4  : " '
+//   5  : %
+//   6  : ( < [ {
+//   7  : ) > ] }
+//   8  : , ; :
+//   9  : .
+//   10 : =
+//   11 : number
+//   12 : upper-case vowel
+//   13 : upper-case consonant
+//   14 : lower-case vowel
+//   15 : lower-case consonant
+//
+// and context2 is based on the second last byte:
+//
+//   0 : control, space
+//   1 : punctuation
+//   2 : upper-case letter, number
+//   3 : lower-case letter
+//
+// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+// stream it will be a continuation byte, value between 128 and 191), the
+// context is the same as if the second last byte was an ASCII control or space.
+//
+// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+// be a continuation byte and the context id is 2 or 3 depending on the LSB of
+// the last byte and to a lesser extent on the second last byte if it is ASCII.
+//
+// If the last byte is a UTF8 continuation byte, the second last byte can be:
+//   - continuation byte: the next byte is probably ASCII or lead byte (assuming
+//     4-byte UTF8 characters are rare) and the context id is 0 or 1.
+//   - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+//   - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+//
+// The possible value combinations of the previous two bytes, the range of
+// context ids and the type of the next byte is summarized in the table below:
+//
+// |--------\-----------------------------------------------------------------|
+// |         \                         Last byte                              |
+// | Second   \---------------------------------------------------------------|
+// | last byte \    ASCII            |   cont. byte        |   lead byte      |
+// |            \   (0-127)          |   (128-191)         |   (192-)         |
+// |=============|===================|=====================|==================|
+// |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
+// |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
+// |-------------|-------------------|---------------------|------------------|
+// |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
+// |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
+// |-------------|-------------------|---------------------|------------------|
+// |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
+// |  (192-207)  |                   |  context: 0 - 1     |                  |
+// |-------------|-------------------|---------------------|------------------|
+// |  lead byte  | not valid         |  next: cont.        |  not valid       |
+// |  (208-)     |                   |  context: 2 - 3     |                  |
+// |-------------|-------------------|---------------------|------------------|
+static const uint8_t kUTF8ContextLookup[512] = {
+  // Last byte.
+  //
+  // ASCII range.
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+  // UTF8 continuation byte range.
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  // UTF8 lead byte range.
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  // Second last byte.
+  //
+  // ASCII range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+  // UTF8 continuation byte range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  // UTF8 lead byte range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+// Context lookup table for small signed integers.
+static const int kSigned3BitContextLookup[] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+enum ContextType {
+  CONTEXT_LSB6         = 0,
+  CONTEXT_MSB6         = 1,
+  CONTEXT_UTF8         = 2,
+  CONTEXT_SIGNED       = 3
+};
+
+static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
+  switch (mode) {
+    case CONTEXT_LSB6:
+      return p1 & 0x3f;
+    case CONTEXT_MSB6:
+      return p1 >> 2;
+    case CONTEXT_UTF8:
+      return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
+    case CONTEXT_SIGNED:
+      return (kSigned3BitContextLookup[p1] << 3) + kSigned3BitContextLookup[p2];
+    default:
+      return 0;
+  }
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_CONTEXT_H_
diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc
new file mode 100644
index 0000000..7d54dbe
--- /dev/null
+++ b/brotli/enc/encode.cc
@@ -0,0 +1,914 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implementation of Brotli compressor.
+
+#include "./encode.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./backward_references.h"
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./context.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./hash.h"
+#include "./histogram.h"
+#include "./literal_cost.h"
+#include "./prefix.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+static const int kWindowBits = 22;
+// To make decoding faster, we allow the decoder to write 16 bytes ahead in
+// its ringbuffer, therefore the encoder has to decrease max distance by this
+// amount.
+static const int kDecoderRingBufferWriteAheadSlack = 16;
+static const int kMaxBackwardDistance =
+    (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
+
+static const int kMetaBlockSizeBits = 21;
+static const int kRingBufferBits = 23;
+static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
+
+template<int kSize>
+double Entropy(const std::vector<Histogram<kSize> >& histograms) {
+  double retval = 0;
+  for (int i = 0; i < histograms.size(); ++i) {
+    retval += histograms[i].EntropyBitCost();
+  }
+  return retval;
+}
+
+template<int kSize>
+double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
+  double retval = 0;
+  for (int i = 0; i < histograms.size(); ++i) {
+    retval += PopulationCost(histograms[i]);
+  }
+  return retval;
+}
+
+void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
+  if (n == 0) {
+    WriteBits(1, 0, storage_ix, storage);
+  } else {
+    WriteBits(1, 1, storage_ix, storage);
+    int nbits = Log2Floor(n);
+    WriteBits(3, nbits, storage_ix, storage);
+    if (nbits > 0) {
+      WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
+    }
+  }
+}
+
+void EncodeMetaBlockLength(size_t meta_block_size,
+                           bool is_last,
+                           bool is_uncompressed,
+                           int* storage_ix, uint8_t* storage) {
+  WriteBits(1, is_last, storage_ix, storage);
+  if (is_last) {
+    if (meta_block_size == 0) {
+      WriteBits(1, 1, storage_ix, storage);
+      return;
+    }
+    WriteBits(1, 0, storage_ix, storage);
+  }
+  --meta_block_size;
+  int num_bits = Log2Floor(meta_block_size) + 1;
+  if (num_bits < 16) {
+    num_bits = 16;
+  }
+  WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
+  while (num_bits > 0) {
+    WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
+    meta_block_size >>= 4;
+    num_bits -= 4;
+  }
+  if (!is_last) {
+    WriteBits(1, is_uncompressed, storage_ix, storage);
+  }
+}
+
+template<int kSize>
+void EntropyEncode(int val, const EntropyCode<kSize>& code,
+                   int* storage_ix, uint8_t* storage) {
+  if (code.count_ <= 1) {
+    return;
+  };
+  WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
+}
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const uint8_t* code_length_bitdepth,
+    int* storage_ix, uint8_t* storage) {
+  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+    1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+  };
+  // Throw away trailing zeros:
+  int codes_to_store = kCodeLengthCodes;
+  for (; codes_to_store > 3; --codes_to_store) {
+    if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+      break;
+    }
+  }
+  WriteBits(4, codes_to_store - 3, storage_ix, storage);
+  const int skip_two_first =
+      code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0;
+  WriteBits(1, skip_two_first, storage_ix, storage);
+
+  for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
+    uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
+    uint8_t bits[] = { 0, 5, 1, 3, 2, 13 };
+    int v = code_length_bitdepth[kStorageOrder[i]];
+    WriteBits(len[v], bits[v], storage_ix, storage);
+  }
+}
+
+void StoreHuffmanTreeToBitMask(
+    const uint8_t* huffman_tree,
+    const uint8_t* huffman_tree_extra_bits,
+    const int huffman_tree_size,
+    const EntropyCode<kCodeLengthCodes>& entropy,
+    int* storage_ix, uint8_t* storage) {
+  for (int i = 0; i < huffman_tree_size; ++i) {
+    const int ix = huffman_tree[i];
+    const int extra_bits = huffman_tree_extra_bits[i];
+    EntropyEncode(ix, entropy, storage_ix, storage);
+    switch (ix) {
+      case 16:
+        WriteBits(2, extra_bits, storage_ix, storage);
+        break;
+      case 17:
+        WriteBits(3, extra_bits, storage_ix, storage);
+        break;
+    }
+  }
+}
+
+template<int kSize>
+void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
+                      int* storage_ix, uint8_t* storage) {
+  const uint8_t *depth = &code.depth_[0];
+  int max_bits_counter = alphabet_size - 1;
+  int max_bits = 0;
+  while (max_bits_counter) {
+    max_bits_counter >>= 1;
+    ++max_bits;
+  }
+  if (code.count_ == 0) {   // emit minimal tree for empty cases
+    // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
+    WriteBits(3 + max_bits, 0x01, storage_ix, storage);
+    return;
+  }
+  if (code.count_ <= 4) {
+    int symbols[4];
+    // Quadratic sort.
+    int k, j;
+    for (k = 0; k < code.count_; ++k) {
+      symbols[k] = code.symbols_[k];
+    }
+    for (k = 0; k < code.count_; ++k) {
+      for (j = k + 1; j < code.count_; ++j) {
+        if (depth[symbols[j]] < depth[symbols[k]]) {
+          int t = symbols[k];
+          symbols[k] = symbols[j];
+          symbols[j] = t;
+        }
+      }
+    }
+    // Small tree marker to encode 1-4 symbols.
+    WriteBits(1, 1, storage_ix, storage);
+    WriteBits(2, code.count_ - 1, storage_ix, storage);
+    for (int i = 0; i < code.count_; ++i) {
+      WriteBits(max_bits, symbols[i], storage_ix, storage);
+    }
+    if (code.count_ == 4) {
+      if (depth[symbols[0]] == 2 &&
+          depth[symbols[1]] == 2 &&
+          depth[symbols[2]] == 2 &&
+          depth[symbols[3]] == 2) {
+        WriteBits(1, 0, storage_ix, storage);
+      } else {
+        WriteBits(1, 1, storage_ix, storage);
+      }
+    }
+    return;
+  }
+  WriteBits(1, 0, storage_ix, storage);
+
+  uint8_t huffman_tree[kSize];
+  uint8_t huffman_tree_extra_bits[kSize];
+  int huffman_tree_size = 0;
+  WriteHuffmanTree(depth,
+                   alphabet_size,
+                   &huffman_tree[0],
+                   &huffman_tree_extra_bits[0],
+                   &huffman_tree_size);
+  Histogram<kCodeLengthCodes> huffman_tree_histogram;
+  memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
+  for (int i = 0; i < huffman_tree_size; ++i) {
+    huffman_tree_histogram.Add(huffman_tree[i]);
+  }
+  EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
+  BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
+                   &huffman_tree_entropy);
+  Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
+  uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
+  while (*last_code == 0 || *last_code >= 17) {
+    trimmed_histogram.Remove(*last_code--);
+  }
+  int trimmed_size = trimmed_histogram.total_count_;
+  bool write_length = false;
+  if (trimmed_size >= 4 && trimmed_size <= 195 &&
+      trimmed_size < huffman_tree_size) {
+    EntropyCode<kCodeLengthCodes> trimmed_entropy;
+    BuildEntropyCode(trimmed_histogram, 5, kCodeLengthCodes, &trimmed_entropy);
+    int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
+                                              huffman_tree_entropy);
+    int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
+                                              trimmed_entropy);;
+    trimmed_bit_cost += (trimmed_size < 68 ? 7 : 8);
+    if (trimmed_bit_cost < huffman_bit_cost) {
+      write_length = true;
+      huffman_tree_size = trimmed_size;
+      huffman_tree_entropy = trimmed_entropy;
+    }
+  }
+
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(
+      &huffman_tree_entropy.depth_[0], storage_ix, storage);
+  WriteBits(1, write_length, storage_ix, storage);
+  if (write_length) {
+    WriteBits(1, huffman_tree_size >= 68, storage_ix, storage);
+    if (huffman_tree_size < 68) {
+      WriteBits(6, huffman_tree_size - 4, storage_ix, storage);
+    } else {
+      WriteBits(7, huffman_tree_size - 68, storage_ix, storage);
+    }
+  }
+  StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
+                            huffman_tree_size, huffman_tree_entropy,
+                            storage_ix, storage);
+}
+
+template<int kSize>
+void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
+                       int alphabet_size,
+                       int* storage_ix, uint8_t* storage) {
+  for (int i = 0; i < codes.size(); ++i) {
+    StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
+  }
+}
+
+void EncodeCommand(const Command& cmd,
+                   const EntropyCodeCommand& entropy,
+                   int* storage_ix, uint8_t* storage) {
+  int code = cmd.command_prefix_;
+  EntropyEncode(code, entropy, storage_ix, storage);
+  if (code >= 128) {
+    code -= 128;
+  }
+  int insert_extra_bits = InsertLengthExtraBits(code);
+  uint64_t insert_extra_bits_val =
+      cmd.insert_length_ - InsertLengthOffset(code);
+  int copy_extra_bits = CopyLengthExtraBits(code);
+  uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
+  if (insert_extra_bits > 0) {
+    WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
+  }
+  if (copy_extra_bits > 0) {
+    WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
+  }
+}
+
+void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
+                        int* storage_ix, uint8_t* storage) {
+  int code = cmd.distance_prefix_;
+  int extra_bits = cmd.distance_extra_bits_;
+  uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
+  EntropyEncode(code, entropy, storage_ix, storage);
+  if (extra_bits > 0) {
+    WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
+  }
+}
+
+void ComputeDistanceShortCodes(std::vector<Command>* cmds,
+                               int* dist_ringbuffer,
+                               size_t* ringbuffer_idx) {
+  static const int kIndexOffset[16] = {
+    3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+  };
+  static const int kValueOffset[16] = {
+    0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+  };
+  for (int i = 0; i < cmds->size(); ++i) {
+    int cur_dist = (*cmds)[i].copy_distance_;
+    if (cur_dist == 0) break;
+    int dist_code = cur_dist + 16;
+    for (int k = 0; k < 16; ++k) {
+      // Only accept more popular choices.
+      if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
+        // Typically unpopular ranges, don't replace a short distance
+        // with them.
+        continue;
+      }
+      int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
+                  kValueOffset[k]);
+      if (cur_dist == comp) {
+        dist_code = k + 1;
+        break;
+      }
+    }
+    if (dist_code > 1) {
+      dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
+      ++(*ringbuffer_idx);
+    }
+    (*cmds)[i].distance_code_ = dist_code;
+  }
+}
+
+void ComputeCommandPrefixes(std::vector<Command>* cmds,
+                            int num_direct_distance_codes,
+                            int distance_postfix_bits) {
+  for (int i = 0; i < cmds->size(); ++i) {
+    Command* cmd = &(*cmds)[i];
+    cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
+                                         cmd->copy_length_code_);
+    if (cmd->copy_length_code_ > 0) {
+      PrefixEncodeCopyDistance(cmd->distance_code_,
+                               num_direct_distance_codes,
+                               distance_postfix_bits,
+                               &cmd->distance_prefix_,
+                               &cmd->distance_extra_bits_,
+                               &cmd->distance_extra_bits_value_);
+    }
+    if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
+      cmd->distance_prefix_ = 0xffff;
+    } else {
+      cmd->command_prefix_ += 128;
+    }
+  }
+}
+
+int IndexOf(const std::vector<int>& v, int value) {
+  for (int i = 0; i < v.size(); ++i) {
+    if (v[i] == value) return i;
+  }
+  return -1;
+}
+
+void MoveToFront(std::vector<int>* v, int index) {
+  int value = (*v)[index];
+  for (int i = index; i > 0; --i) {
+    (*v)[i] = (*v)[i - 1];
+  }
+  (*v)[0] = value;
+}
+
+std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
+  if (v.empty()) return v;
+  std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
+  for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
+  std::vector<int> result(v.size());
+  for (int i = 0; i < v.size(); ++i) {
+    int index = IndexOf(mtf, v[i]);
+    result[i] = index;
+    MoveToFront(&mtf, index);
+  }
+  return result;
+}
+
+// Finds runs of zeros in v_in and replaces them with a prefix code of the run
+// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
+// shifted by *max_length_prefix. Will not create prefix codes bigger than the
+// initial value of *max_run_length_prefix. The prefix code of run length L is
+// simply Log2Floor(L) and the number of extra bits is the same as the prefix
+// code.
+void RunLengthCodeZeros(const std::vector<int>& v_in,
+                        int* max_run_length_prefix,
+                        std::vector<int>* v_out,
+                        std::vector<int>* extra_bits) {
+  int max_reps = 0;
+  for (int i = 0; i < v_in.size();) {
+    for (; i < v_in.size() && v_in[i] != 0; ++i) ;
+    int reps = 0;
+    for (; i < v_in.size() && v_in[i] == 0; ++i) {
+      ++reps;
+    }
+    max_reps = std::max(reps, max_reps);
+  }
+  int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
+  *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
+  for (int i = 0; i < v_in.size();) {
+    if (v_in[i] != 0) {
+      v_out->push_back(v_in[i] + *max_run_length_prefix);
+      extra_bits->push_back(0);
+      ++i;
+    } else {
+      int reps = 1;
+      for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      while (reps) {
+        if (reps < (2 << *max_run_length_prefix)) {
+          int run_length_prefix = Log2Floor(reps);
+          v_out->push_back(run_length_prefix);
+          extra_bits->push_back(reps - (1 << run_length_prefix));
+          break;
+        } else {
+          v_out->push_back(*max_run_length_prefix);
+          extra_bits->push_back((1 << *max_run_length_prefix) - 1);
+          reps -= (2 << *max_run_length_prefix) - 1;
+        }
+      }
+    }
+  }
+}
+
+// Returns a maximum zero-run-length-prefix value such that run-length coding
+// zeros in v with this maximum prefix value and then encoding the resulting
+// histogram and entropy-coding v produces the least amount of bits.
+int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
+  int min_cost = std::numeric_limits<int>::max();
+  int best_max_prefix = 0;
+  for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
+    std::vector<int> rle_symbols;
+    std::vector<int> extra_bits;
+    int max_run_length_prefix = max_prefix;
+    RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
+    if (max_run_length_prefix < max_prefix) break;
+    HistogramLiteral histogram;
+    for (int i = 0; i < rle_symbols.size(); ++i) {
+      histogram.Add(rle_symbols[i]);
+    }
+    int bit_cost = PopulationCost(histogram);
+    if (max_prefix > 0) {
+      bit_cost += 4;
+    }
+    for (int i = 1; i <= max_prefix; ++i) {
+      bit_cost += histogram.data_[i] * i;  // extra bits
+    }
+    if (bit_cost < min_cost) {
+      min_cost = bit_cost;
+      best_max_prefix = max_prefix;
+    }
+  }
+  return best_max_prefix;
+}
+
+void EncodeContextMap(const std::vector<int>& context_map,
+                      int num_clusters,
+                      int* storage_ix, uint8_t* storage) {
+  EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
+
+  if (num_clusters == 1) {
+    return;
+  }
+
+  std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
+  std::vector<int> rle_symbols;
+  std::vector<int> extra_bits;
+  int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
+  RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
+                     &rle_symbols, &extra_bits);
+  HistogramContextMap symbol_histogram;
+  for (int i = 0; i < rle_symbols.size(); ++i) {
+    symbol_histogram.Add(rle_symbols[i]);
+  }
+  EntropyCodeContextMap symbol_code;
+  BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
+                   &symbol_code);
+  bool use_rle = max_run_length_prefix > 0;
+  WriteBits(1, use_rle, storage_ix, storage);
+  if (use_rle) {
+    WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+  }
+  StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
+                   storage_ix, storage);
+  for (int i = 0; i < rle_symbols.size(); ++i) {
+    EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
+    if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
+      WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
+    }
+  }
+  WriteBits(1, 1, storage_ix, storage);  // use move-to-front
+}
+
+template<int kSize>
+void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
+                       int alphabet_size,
+                       std::vector<EntropyCode<kSize> >* entropy_codes) {
+  entropy_codes->resize(histograms.size());
+  for (int i = 0; i < histograms.size(); ++i) {
+    BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
+  }
+}
+
+struct BlockSplitCode {
+  EntropyCodeBlockType block_type_code;
+  EntropyCodeBlockLength block_len_code;
+};
+
+void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
+                       int length,
+                       int* storage_ix, uint8_t* storage) {
+  int len_code = BlockLengthPrefix(length);
+  int extra_bits = BlockLengthExtraBits(len_code);
+  int extra_bits_value = length - BlockLengthOffset(len_code);
+  EntropyEncode(len_code, entropy, storage_ix, storage);
+
+  if (extra_bits > 0) {
+    WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
+  }
+}
+
+void ComputeBlockTypeShortCodes(BlockSplit* split) {
+  if (split->num_types_ <= 1) {
+    split->num_types_ = 1;
+    return;
+  }
+  int ringbuffer[2] = { 0, 1 };
+  size_t index = 0;
+  for (int i = 0; i < split->types_.size(); ++i) {
+    int type = split->types_[i];
+    int type_code;
+    if (type == ringbuffer[index & 1]) {
+      type_code = 0;
+    } else if (type == ringbuffer[(index - 1) & 1] + 1) {
+      type_code = 1;
+    } else {
+      type_code = type + 2;
+    }
+    ringbuffer[index & 1] = type;
+    ++index;
+    split->type_codes_.push_back(type_code);
+  }
+}
+
+void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
+                                  BlockSplitCode* code,
+                                  int* storage_ix, uint8_t* storage) {
+  EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
+  if (split.num_types_ == 1) {
+    return;
+  }
+
+  HistogramBlockType type_histo;
+  for (int i = 0; i < split.type_codes_.size(); ++i) {
+    type_histo.Add(split.type_codes_[i]);
+  }
+  BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
+                   &code->block_type_code);
+  HistogramBlockLength length_histo;
+  for (int i = 0; i < split.lengths_.size(); ++i) {
+    length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
+  }
+  BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+                   &code->block_len_code);
+  StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
+                   storage_ix, storage);
+  StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
+                   storage_ix, storage);
+  EncodeBlockLength(code->block_len_code, split.lengths_[0],
+                    storage_ix, storage);
+}
+
+void MoveAndEncode(const BlockSplitCode& code,
+                   BlockSplitIterator* it,
+                   int* storage_ix, uint8_t* storage) {
+  if (it->length_ == 0) {
+    ++it->idx_;
+    it->type_ = it->split_.types_[it->idx_];
+    it->length_ = it->split_.lengths_[it->idx_];
+    int type_code = it->split_.type_codes_[it->idx_];
+    EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
+    EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
+  }
+  --it->length_;
+}
+
+struct EncodingParams {
+  int num_direct_distance_codes;
+  int distance_postfix_bits;
+  int literal_context_mode;
+};
+
+struct MetaBlock {
+  std::vector<Command> cmds;
+  EncodingParams params;
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
+  std::vector<int> literal_context_modes;
+  std::vector<int> literal_context_map;
+  std::vector<int> distance_context_map;
+  std::vector<HistogramLiteral> literal_histograms;
+  std::vector<HistogramCommand> command_histograms;
+  std::vector<HistogramDistance> distance_histograms;
+};
+
+void BuildMetaBlock(const EncodingParams& params,
+                    const std::vector<Command>& cmds,
+                    const uint8_t* ringbuffer,
+                    const size_t pos,
+                    const size_t mask,
+                    MetaBlock* mb) {
+  mb->cmds = cmds;
+  mb->params = params;
+  if (cmds.empty()) {
+    return;
+  }
+  ComputeCommandPrefixes(&mb->cmds,
+                         mb->params.num_direct_distance_codes,
+                         mb->params.distance_postfix_bits);
+  SplitBlock(mb->cmds,
+             &ringbuffer[pos & mask],
+             &mb->literal_split,
+             &mb->command_split,
+             &mb->distance_split);
+  ComputeBlockTypeShortCodes(&mb->literal_split);
+  ComputeBlockTypeShortCodes(&mb->command_split);
+  ComputeBlockTypeShortCodes(&mb->distance_split);
+
+  mb->literal_context_modes.resize(mb->literal_split.num_types_,
+                                   mb->params.literal_context_mode);
+
+
+  int num_literal_contexts =
+      mb->literal_split.num_types_ << kLiteralContextBits;
+  int num_distance_contexts =
+      mb->distance_split.num_types_ << kDistanceContextBits;
+  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+  mb->command_histograms.resize(mb->command_split.num_types_);
+  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+  BuildHistograms(mb->cmds,
+                  mb->literal_split,
+                  mb->command_split,
+                  mb->distance_split,
+                  ringbuffer,
+                  pos,
+                  mask,
+                  mb->literal_context_modes,
+                  &literal_histograms,
+                  &mb->command_histograms,
+                  &distance_histograms);
+
+  // Histogram ids need to fit in one byte.
+  static const int kMaxNumberOfHistograms = 256;
+
+  mb->literal_histograms = literal_histograms;
+  ClusterHistograms(literal_histograms,
+                    1 << kLiteralContextBits,
+                    mb->literal_split.num_types_,
+                    kMaxNumberOfHistograms,
+                    &mb->literal_histograms,
+                    &mb->literal_context_map);
+
+  mb->distance_histograms = distance_histograms;
+  ClusterHistograms(distance_histograms,
+                    1 << kDistanceContextBits,
+                    mb->distance_split.num_types_,
+                    kMaxNumberOfHistograms,
+                    &mb->distance_histograms,
+                    &mb->distance_context_map);
+}
+
+size_t MetaBlockLength(const std::vector<Command>& cmds) {
+  size_t length = 0;
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command& cmd = cmds[i];
+    length += cmd.insert_length_ + cmd.copy_length_;
+  }
+  return length;
+}
+
+void StoreMetaBlock(const MetaBlock& mb,
+                    const bool is_last,
+                    const uint8_t* ringbuffer,
+                    const size_t mask,
+                    size_t* pos,
+                    int* storage_ix, uint8_t* storage) {
+  size_t length = MetaBlockLength(mb.cmds);
+  const size_t end_pos = *pos + length;
+  EncodeMetaBlockLength(length,
+                        is_last,
+                        false,
+                        storage_ix, storage);
+  if (length == 0) {
+    return;
+  }
+  BlockSplitCode literal_split_code;
+  BlockSplitCode command_split_code;
+  BlockSplitCode distance_split_code;
+  BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
+                               storage_ix, storage);
+  BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
+                               storage_ix, storage);
+  BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
+                               storage_ix, storage);
+  WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
+  WriteBits(4,
+            mb.params.num_direct_distance_codes >>
+            mb.params.distance_postfix_bits, storage_ix, storage);
+  int num_distance_codes =
+      kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
+      (48 << mb.params.distance_postfix_bits);
+  for (int i = 0; i < mb.literal_split.num_types_; ++i) {
+    WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
+  }
+  EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
+  EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
+  std::vector<EntropyCodeLiteral> literal_codes;
+  std::vector<EntropyCodeCommand> command_codes;
+  std::vector<EntropyCodeDistance> distance_codes;
+  BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
+  BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+                    &command_codes);
+  BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
+                    &distance_codes);
+  StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
+  StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
+  StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
+  BlockSplitIterator literal_it(mb.literal_split);
+  BlockSplitIterator command_it(mb.command_split);
+  BlockSplitIterator distance_it(mb.distance_split);
+  for (int i = 0; i < mb.cmds.size(); ++i) {
+    const Command& cmd = mb.cmds[i];
+    MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
+    EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
+    for (int j = 0; j < cmd.insert_length_; ++j) {
+      MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
+      int histogram_idx = literal_it.type_;
+      uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
+      uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
+      int context = ((literal_it.type_ << kLiteralContextBits) +
+                     Context(prev_byte, prev_byte2,
+                             mb.literal_context_modes[literal_it.type_]));
+      histogram_idx = mb.literal_context_map[context];
+      EntropyEncode(ringbuffer[*pos & mask],
+                    literal_codes[histogram_idx], storage_ix, storage);
+      ++(*pos);
+    }
+    if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
+      MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
+      int context = (distance_it.type_ << 2) +
+          ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
+      int histogram_index = mb.distance_context_map[context];
+      size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
+      EncodeCopyDistance(cmd, distance_codes[histogram_index],
+                         storage_ix, storage);
+    }
+    *pos += cmd.copy_length_;
+  }
+}
+
+BrotliCompressor::BrotliCompressor()
+    : window_bits_(kWindowBits),
+      hasher_(new Hasher),
+      dist_ringbuffer_idx_(0),
+      input_pos_(0),
+      ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
+      literal_cost_(1 << kRingBufferBits),
+      storage_ix_(0),
+      storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
+  dist_ringbuffer_[0] = 16;
+  dist_ringbuffer_[1] = 15;
+  dist_ringbuffer_[2] = 11;
+  dist_ringbuffer_[3] = 4;
+  storage_[0] = 0;
+}
+
+BrotliCompressor::~BrotliCompressor() {
+  delete hasher_;
+  delete[] storage_;
+}
+
+void BrotliCompressor::WriteStreamHeader() {
+  // Encode window size.
+  if (window_bits_ == 16) {
+    WriteBits(1, 0, &storage_ix_, storage_);
+  } else {
+    WriteBits(1, 1, &storage_ix_, storage_);
+    WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
+  }
+}
+
+void BrotliCompressor::WriteMetaBlock(const size_t input_size,
+                                      const uint8_t* input_buffer,
+                                      const bool is_last,
+                                      size_t* encoded_size,
+                                      uint8_t* encoded_buffer) {
+  std::vector<Command> commands;
+  if (input_size > 0) {
+    ringbuffer_.Write(input_buffer, input_size);
+    EstimateBitCostsForLiterals(input_pos_, input_size,
+                                kRingBufferMask, ringbuffer_.start(),
+                                &literal_cost_[0]);
+    CreateBackwardReferences(input_size, input_pos_,
+                             ringbuffer_.start(),
+                             &literal_cost_[0],
+                             kRingBufferMask, kMaxBackwardDistance,
+                             hasher_,
+                             &commands);
+    ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
+                              &dist_ringbuffer_idx_);
+  }
+  EncodingParams params;
+  params.num_direct_distance_codes = 12;
+  params.distance_postfix_bits = 1;
+  params.literal_context_mode = CONTEXT_SIGNED;
+  const int storage_ix0 = storage_ix_;
+  MetaBlock mb;
+  BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
+                 kRingBufferMask, &mb);
+  StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
+                 &input_pos_, &storage_ix_, storage_);
+  size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
+  if (input_size + 4 < output_size) {
+    storage_ix_ = storage_ix0;
+    storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
+    EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
+    size_t hdr_size = (storage_ix_ + 7) >> 3;
+    memcpy(encoded_buffer, storage_, hdr_size);
+    memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
+    *encoded_size = hdr_size + input_size;
+    if (is_last) {
+      encoded_buffer[*encoded_size] = 0x3;  // ISLAST, ISEMPTY
+      ++(*encoded_size);
+    }
+    storage_ix_ = 0;
+    storage_[0] = 0;
+  } else {
+    memcpy(encoded_buffer, storage_, output_size);
+    *encoded_size = output_size;
+    if (is_last) {
+      storage_ix_ = 0;
+      storage_[0] = 0;
+    } else {
+      storage_ix_ -= output_size << 3;
+      storage_[storage_ix_ >> 3] = storage_[output_size];
+    }
+  }
+}
+
+void BrotliCompressor::FinishStream(
+    size_t* encoded_size, uint8_t* encoded_buffer) {
+  WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
+}
+
+
+int BrotliCompressBuffer(size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer) {
+  if (input_size == 0) {
+    encoded_buffer[0] = 6;
+    *encoded_size = 1;
+    return 1;
+  }
+
+  BrotliCompressor compressor;
+  compressor.WriteStreamHeader();
+
+  const int max_block_size = 1 << kMetaBlockSizeBits;
+  size_t max_output_size = *encoded_size;
+  const uint8_t* input_end = input_buffer + input_size;
+  *encoded_size = 0;
+
+  while (input_buffer < input_end) {
+    int block_size = max_block_size;
+    bool is_last = false;
+    if (block_size >= input_end - input_buffer) {
+      block_size = input_end - input_buffer;
+      is_last = true;
+    }
+    size_t output_size = max_output_size;
+    compressor.WriteMetaBlock(block_size, input_buffer, is_last,
+                              &output_size, &encoded_buffer[*encoded_size]);
+    input_buffer += block_size;
+    *encoded_size += output_size;
+    max_output_size -= output_size;
+  }
+
+  return 1;
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/encode.h b/brotli/enc/encode.h
new file mode 100644
index 0000000..0494b83
--- /dev/null
+++ b/brotli/enc/encode.h
@@ -0,0 +1,75 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// API for Brotli compression
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+#include "./hash.h"
+#include "./ringbuffer.h"
+
+namespace brotli {
+
+class BrotliCompressor {
+ public:
+  BrotliCompressor();
+  ~BrotliCompressor();
+
+  // Writes the stream header into the internal output buffer.
+  void WriteStreamHeader();
+
+  // Encodes the data in input_buffer as a meta-block and writes it to
+  // encoded_buffer and sets *encoded_size to the number of bytes that was
+  // written.
+  void WriteMetaBlock(const size_t input_size,
+                      const uint8_t* input_buffer,
+                      const bool is_last,
+                      size_t* encoded_size,
+                      uint8_t* encoded_buffer);
+
+  // Writes a zero-length meta-block with end-of-input bit set to the
+  // internal output buffer and copies the output buffer to encoded_buffer and
+  // sets *encoded_size to the number of bytes written.
+  void FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
+
+
+ private:
+  int window_bits_;
+  Hasher* hasher_;
+  int dist_ringbuffer_[4];
+  size_t dist_ringbuffer_idx_;
+  size_t input_pos_;
+  RingBuffer ringbuffer_;
+  std::vector<float> literal_cost_;
+  int storage_ix_;
+  uint8_t* storage_;
+};
+
+// Compresses the data in input_buffer into encoded_buffer, and sets
+// *encoded_size to the compressed length.
+// Returns 0 if there was an error and 1 otherwise.
+int BrotliCompressBuffer(size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer);
+
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENCODE_H_
diff --git a/brotli/enc/entropy_encode.cc b/brotli/enc/entropy_encode.cc
new file mode 100644
index 0000000..ad9f0f5
--- /dev/null
+++ b/brotli/enc/entropy_encode.cc
@@ -0,0 +1,403 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#include "./entropy_encode.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <limits>
+#include <vector>
+#include <cstdlib>
+
+#include "./histogram.h"
+
+namespace brotli {
+
+namespace {
+
+struct HuffmanTree {
+  HuffmanTree();
+  HuffmanTree(int count, int16_t left, int16_t right)
+      : total_count_(count),
+        index_left_(left),
+        index_right_or_value_(right) {
+  }
+  int total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+};
+
+HuffmanTree::HuffmanTree() {}
+
+// Sort the root nodes, least popular first.
+bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
+  if (v0.total_count_ == v1.total_count_) {
+    return v0.index_right_or_value_ > v1.index_right_or_value_;
+  }
+  return v0.total_count_ < v1.total_count_;
+}
+
+void SetDepth(const HuffmanTree &p,
+              HuffmanTree *pool,
+              uint8_t *depth,
+              int level) {
+  if (p.index_left_ >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left_], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value_], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value_] = level;
+  }
+}
+
+}  // namespace
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+                       const int length,
+                       const int tree_limit,
+                       uint8_t *depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (int count_limit = 1; ; count_limit *= 2) {
+    std::vector<HuffmanTree> tree;
+    tree.reserve(2 * length + 1);
+
+    for (int i = 0; i < length; ++i) {
+      if (data[i]) {
+        const int count = std::max(data[i], count_limit);
+        tree.push_back(HuffmanTree(count, -1, i));
+      }
+    }
+
+    const int n = tree.size();
+    if (n == 1) {
+      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
+      break;
+    }
+
+    std::sort(tree.begin(), tree.end(), SortHuffmanTree);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
+    tree.push_back(sentinel);
+    tree.push_back(sentinel);
+
+    int i = 0;      // Points to the next leaf node.
+    int j = n + 1;  // Points to the next non-leaf node.
+    for (int k = n - 1; k > 0; --k) {
+      int left, right;
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      int j_end = tree.size() - 1;
+      tree[j_end].total_count_ =
+          tree[left].total_count_ + tree[right].total_count_;
+      tree[j_end].index_left_ = left;
+      tree[j_end].index_right_or_value_ = right;
+
+      // Add back the last sentinel node.
+      tree.push_back(sentinel);
+    }
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+void Reverse(uint8_t* v, int start, int end) {
+  --end;
+  while (start < end) {
+    int tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+void WriteHuffmanTreeRepetitions(
+    const int previous_value,
+    const int value,
+    int repetitions,
+    uint8_t* tree,
+    uint8_t* extra_bits,
+    int* tree_size) {
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (int i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    int start = *tree_size;
+    while (repetitions >= 0) {
+      tree[*tree_size] = 16;
+      extra_bits[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits, start, *tree_size);
+  }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(
+    int repetitions,
+    uint8_t* tree,
+    uint8_t* extra_bits,
+    int* tree_size) {
+  if (repetitions < 3) {
+    for (int i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    int start = *tree_size;
+    while (repetitions >= 0) {
+      tree[*tree_size] = 17;
+      extra_bits[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits, start, *tree_size);
+  }
+}
+
+
+// Heuristics for selecting the stride ranges to collapse.
+int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+  return abs(a - b) < 4;
+}
+
+int OptimizeHuffmanCountsForRle(int length, int* counts) {
+  int stride;
+  int limit;
+  int sum;
+  uint8_t* good_for_rle;
+  // Let's make the Huffman code more compatible with rle encoding.
+  int i;
+  for (; length >= 0; --length) {
+    if (length == 0) {
+      return 1;  // All zeros.
+    }
+    if (counts[length - 1] != 0) {
+      // Now counts[0..length - 1] does not have trailing zeros.
+      break;
+    }
+  }
+  // 2) Let's mark all population counts that already can be encoded
+  // with an rle code.
+  good_for_rle = (uint8_t*)calloc(length, 1);
+  if (good_for_rle == NULL) {
+    return 0;
+  }
+  {
+    // Let's not spoil any of the existing good rle codes.
+    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+    int symbol = counts[0];
+    int stride = 0;
+    for (i = 0; i < length + 1; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && stride >= 5) ||
+            (symbol != 0 && stride >= 7)) {
+          int k;
+          for (k = 0; k < stride; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        stride = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++stride;
+      }
+    }
+  }
+  // 3) Let's replace those population counts that lead to more rle codes.
+  stride = 0;
+  limit = (counts[0] + counts[1] + counts[2]) / 3 + 1;
+  sum = 0;
+  for (i = 0; i < length + 1; ++i) {
+    if (i == length || good_for_rle[i] ||
+        (i != 0 && good_for_rle[i - 1]) ||
+        !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+      if (stride >= 4 || (stride >= 3 && sum == 0)) {
+        int k;
+        // The stride must end, collapse what we have, if we have enough (4).
+        int count = (sum + stride / 2) / stride;
+        if (count < 1) {
+          count = 1;
+        }
+        if (sum == 0) {
+          // Don't make an all zeros stride to be upgraded to ones.
+          count = 0;
+        }
+        for (k = 0; k < stride; ++k) {
+          // We don't want to change value at counts[i],
+          // that is already belonging to the next stride. Thus - 1.
+          counts[i - k - 1] = count;
+        }
+      }
+      stride = 0;
+      sum = 0;
+      if (i < length - 2) {
+        // All interesting strides have a count of at least 4,
+        // at least when non-zeros.
+        limit = (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 1;
+      } else if (i < length) {
+        limit = counts[i];
+      } else {
+        limit = 0;
+      }
+    }
+    ++stride;
+    if (i != length) {
+      sum += counts[i];
+      if (stride >= 4) {
+        limit = (sum + stride / 2) / stride;
+      }
+    }
+  }
+  free(good_for_rle);
+  return 1;
+}
+
+
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+                      uint8_t* tree,
+                      uint8_t* extra_bits_data,
+                      int* huffman_tree_size) {
+  int previous_value = 8;
+  for (uint32_t i = 0; i < length;) {
+    const int value = depth[i];
+    int reps = 1;
+    for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (value == 0) {
+      WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data,
+                                       huffman_tree_size);
+    } else {
+      WriteHuffmanTreeRepetitions(previous_value, value, reps, tree,
+                                  extra_bits_data, huffman_tree_size);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {  // Pre-reversed 4-bit values.
+    0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+    0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+  };
+  size_t retval = kLut[bits & 0xf];
+  for (int i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits >>= 4;
+    retval |= kLut[bits & 0xf];
+  }
+  retval >>= (-num_bits & 0x3);
+  return retval;
+}
+
+}  // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
+  // In Brotli, all bit depths are [1..15]
+  // 0 bit depth means that the symbol does not exist.
+  const int kMaxBits = 16;  // 0..15 are values for bits
+  uint16_t bl_count[kMaxBits] = { 0 };
+  {
+    for (int i = 0; i < len; ++i) {
+      ++bl_count[depth[i]];
+    }
+    bl_count[0] = 0;
+  }
+  uint16_t next_code[kMaxBits];
+  next_code[0] = 0;
+  {
+    int code = 0;
+    for (int bits = 1; bits < kMaxBits; ++bits) {
+      code = (code + bl_count[bits - 1]) << 1;
+      next_code[bits] = code;
+    }
+  }
+  for (int i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/entropy_encode.h b/brotli/enc/entropy_encode.h
new file mode 100644
index 0000000..89c3e1a
--- /dev/null
+++ b/brotli/enc/entropy_encode.h
@@ -0,0 +1,116 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "./histogram.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+                       const int length,
+                       const int tree_limit,
+                       uint8_t *depth);
+
+// Change the population counts in a way that the consequent
+// Hufmann tree compression, especially its rle-part will be more
+// likely to compress this data more efficiently.
+//
+// length contains the size of the histogram.
+// counts contains the population counts.
+int OptimizeHuffmanCountsForRle(int length, int* counts);
+
+
+// Write a huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+                      uint8_t* tree,
+                      uint8_t* extra_bits_data,
+                      int* huffman_tree_size);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
+
+template<int kSize>
+struct EntropyCode {
+  // How many bits for symbol.
+  uint8_t depth_[kSize];
+  // Actual bits used to represent the symbol.
+  uint16_t bits_[kSize];
+  // How many non-zero depth.
+  int count_;
+  // First four symbols with non-zero depth.
+  int symbols_[4];
+};
+
+template<int kSize>
+void BuildEntropyCode(const Histogram<kSize>& histogram,
+                      const int tree_limit,
+                      const int alphabet_size,
+                      EntropyCode<kSize>* code) {
+  memset(code->depth_, 0, sizeof(code->depth_));
+  memset(code->bits_, 0, sizeof(code->bits_));
+  memset(code->symbols_, 0, sizeof(code->symbols_));
+  code->count_ = 0;
+  if (histogram.total_count_ == 0) return;
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram.data_[i] > 0) {
+      if (code->count_ < 4) code->symbols_[code->count_] = i;
+      ++code->count_;
+    }
+  }
+  if (code->count_ >= 64) {
+    int counts[kSize];
+    memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
+    OptimizeHuffmanCountsForRle(alphabet_size, counts);
+    CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
+  } else {
+    CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
+                      &code->depth_[0]);
+  }
+  ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
+}
+
+static const int kCodeLengthCodes = 18;
+
+// Literal entropy code.
+typedef EntropyCode<256> EntropyCodeLiteral;
+// Prefix entropy codes.
+typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
+typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
+typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
+// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
+typedef EntropyCode<272> EntropyCodeContextMap;
+// Block type entropy code, 256 block types + 2 special symbols.
+typedef EntropyCode<258> EntropyCodeBlockType;
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENTROPY_ENCODE_H_
diff --git a/brotli/enc/fast_log.h b/brotli/enc/fast_log.h
new file mode 100644
index 0000000..0b09ea6
--- /dev/null
+++ b/brotli/enc/fast_log.h
@@ -0,0 +1,161 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utilities for fast computation of logarithms.
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+inline int Log2Floor(uint32_t n) {
+#if defined(__clang__) ||                       \
+  (defined(__GNUC__) &&                                         \
+   ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
+  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+#else
+  if (n == 0)
+    return -1;
+  int log = 0;
+  uint32_t value = n;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    uint32_t x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  assert(value == 1);
+  return log;
+#endif
+}
+
+// Return ceiling(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+inline int Log2Ceiling(uint32_t n) {
+  int floor = Log2Floor(n);
+  if (n == (n &~ (n - 1)))              // zero or a power of two
+    return floor;
+  else
+    return floor + 1;
+}
+
+// A lookup table for small values of log2(int) to be used in entropy
+// computation.
+//
+// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
+static const float kLog2Table[] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
+// Faster logarithm for small integers, with the property of log2(0) == 0.
+static inline double FastLog2(int v) {
+  if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
+    return kLog2Table[v];
+  }
+  return log2(v);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_FAST_LOG_H_
diff --git a/brotli/enc/find_match_length.h b/brotli/enc/find_match_length.h
new file mode 100644
index 0000000..0994ac2
--- /dev/null
+++ b/brotli/enc/find_match_length.h
@@ -0,0 +1,85 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find maximal matching prefixes of strings.
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include <stdint.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+// Separate implementation for x86_64, for speed.
+#if defined(__GNUC__) && defined(ARCH_K8)
+
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+                                           const uint8_t* s2,
+                                           size_t limit) {
+  int matched = 0;
+  size_t limit2 = (limit >> 3) + 1;  // + 1 is for pre-decrement in while
+  while (PREDICT_TRUE(--limit2)) {
+    if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
+                      BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t x =
+          BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
+      int matching_bits =  __builtin_ctzll(x);
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  limit = (limit & 7) + 1;  // + 1 is for pre-decrement in while
+  while (--limit) {
+    if (PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+                                           const uint8_t* s2,
+                                           size_t limit) {
+  int matched = 0;
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
+  // Find out how long the match is. We loop over the data 32 bits at a
+  // time until we find a 32-bit block that doesn't match; then we find
+  // the first non-matching bit and use that to calculate the total
+  // length of the match.
+  while (s2_ptr <= s2_limit - 4 &&
+         BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
+         BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_FIND_MATCH_LENGTH_H_
diff --git a/brotli/enc/hash.h b/brotli/enc/hash.h
new file mode 100644
index 0000000..b45d71c
--- /dev/null
+++ b/brotli/enc/hash.h
@@ -0,0 +1,363 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <cstdlib>
+
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+
+namespace brotli {
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+//   for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
+  uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
+  // The higher bits contain more mixture from the multiplication,
+  // so we take our results from there.
+  return h >> (32 - bits);
+}
+
+// Usually, we always choose the longest backward reference. This function
+// allows for the exception of that rule.
+//
+// If we choose a backward reference that is further away, it will
+// usually be coded with more bits. We approximate this by assuming
+// log2(distance). If the distance can be expressed in terms of the
+// last four distances, we use some heuristic constants to estimate
+// the bits cost. For the first up to four literals we use the bit
+// cost of the literals from the literal cost model, after that we
+// use the average bit cost of the cost model.
+//
+// This function is used to sometimes discard a longer backward reference
+// when it is not much longer and the bit cost for encoding it is more
+// than the saved literals.
+inline double BackwardReferenceScore(double average_cost,
+                                     double start_cost4,
+                                     double start_cost3,
+                                     double start_cost2,
+                                     int copy_length,
+                                     int backward_reference_offset,
+                                     int last_distance1,
+                                     int last_distance2,
+                                     int last_distance3,
+                                     int last_distance4) {
+  double retval = 0;
+  switch (copy_length) {
+    case 2: retval = start_cost2; break;
+    case 3: retval = start_cost3; break;
+    default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
+  }
+  int diff_last1 = abs(backward_reference_offset - last_distance1);
+  int diff_last2 = abs(backward_reference_offset - last_distance2);
+  if (diff_last1 == 0) {
+    retval += 0.6;
+  } else if (diff_last1 < 4) {
+    retval -= 0.9 + 0.03 * diff_last1;
+  } else if (diff_last2 < 4) {
+    retval -= 0.95 + 0.1 * diff_last2;
+  } else if (backward_reference_offset == last_distance3) {
+    retval -= 1.17;
+  } else if (backward_reference_offset == last_distance4) {
+    retval -= 1.27;
+  } else {
+    retval -= 1.20 * Log2Floor(backward_reference_offset);
+  }
+  return retval;
+}
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+//
+// This is a hash map of fixed size (kBucketSize) to a ring buffer of
+// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
+// index positions of the given hash key in the compressed data.
+template <int kBucketBits, int kBlockBits>
+class HashLongestMatch {
+ public:
+  HashLongestMatch()
+      : last_distance1_(4),
+        last_distance2_(11),
+        last_distance3_(15),
+        last_distance4_(16),
+        insert_length_(0),
+        average_cost_(5.4) {
+    Reset();
+  }
+  void Reset() {
+    std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
+  }
+
+  // Look at 3 bytes at data.
+  // Compute a hash from these, and store the value of ix at that position.
+  inline void Store(const uint8_t *data, const int ix) {
+    const uint32_t key = Hash3Bytes(data, kBucketBits);
+    const int minor_ix = num_[key] & kBlockMask;
+    buckets_[key][minor_ix] = ix;
+    ++num_[key];
+  }
+
+  // Store hashes for a range of data.
+  void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
+    for (int p = 0; p < len; ++p) {
+      Store(&data[p & mask], startix + p);
+    }
+  }
+
+  // Find a longest backward match of &data[cur_ix] up to the length of
+  // max_length.
+  //
+  // Does not look for matches longer than max_length.
+  // Does not look for matches further away than max_backward.
+  // Writes the best found match length into best_len_out.
+  // Writes the index (&data[index]) offset from the start of the best match
+  // into best_distance_out.
+  // Write the score of the best match into best_score_out.
+  bool FindLongestMatch(const uint8_t * __restrict data,
+                        const float * __restrict literal_cost,
+                        const size_t ring_buffer_mask,
+                        const uint32_t cur_ix,
+                        uint32_t max_length,
+                        const uint32_t max_backward,
+                        size_t * __restrict best_len_out,
+                        size_t * __restrict best_len_code_out,
+                        size_t * __restrict best_distance_out,
+                        double * __restrict best_score_out) {
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    const double start_cost4 = literal_cost == NULL ? 20 :
+        literal_cost[cur_ix_masked] +
+        literal_cost[(cur_ix + 1) & ring_buffer_mask] +
+        literal_cost[(cur_ix + 2) & ring_buffer_mask] +
+        literal_cost[(cur_ix + 3) & ring_buffer_mask];
+    const double start_cost3 = literal_cost == NULL ? 15 :
+        literal_cost[cur_ix_masked] +
+        literal_cost[(cur_ix + 1) & ring_buffer_mask] +
+        literal_cost[(cur_ix + 2) & ring_buffer_mask] + 0.3;
+    double start_cost2 = literal_cost == NULL ? 10 :
+        literal_cost[cur_ix_masked] +
+        literal_cost[(cur_ix + 1) & ring_buffer_mask] + 1.2;
+    bool match_found = false;
+    // Don't accept a short copy from far away.
+    double best_score = 8.25;
+    if (insert_length_ < 4) {
+      double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 };
+      best_score += cost_diff[insert_length_];
+    }
+    size_t best_len = *best_len_out;
+    *best_len_out = 0;
+    size_t best_ix = 1;
+    // Try last distance first.
+    for (int i = 0; i < 16; ++i) {
+      size_t prev_ix = cur_ix;
+      switch(i) {
+        case 0: prev_ix -= last_distance1_; break;
+        case 1: prev_ix -= last_distance2_; break;
+        case 2: prev_ix -= last_distance3_; break;
+        case 3: prev_ix -= last_distance4_; break;
+
+        case 4: prev_ix -= last_distance1_ - 1; break;
+        case 5: prev_ix -= last_distance1_ + 1; break;
+        case 6: prev_ix -= last_distance1_ - 2; break;
+        case 7: prev_ix -= last_distance1_ + 2; break;
+        case 8: prev_ix -= last_distance1_ - 3; break;
+        case 9: prev_ix -= last_distance1_ + 3; break;
+
+        case 10: prev_ix -= last_distance2_ - 1; break;
+        case 11: prev_ix -= last_distance2_ + 1; break;
+        case 12: prev_ix -= last_distance2_ - 2; break;
+        case 13: prev_ix -= last_distance2_ + 2; break;
+        case 14: prev_ix -= last_distance2_ - 3; break;
+        case 15: prev_ix -= last_distance2_ + 3; break;
+      }
+      if (prev_ix >= cur_ix) {
+        continue;
+      }
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        continue;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        // Comparing for >= 2 does not change the semantics, but just saves for
+        // a few unnecessary binary logarithms in backward reference score,
+        // since we are not interested in such short matches.
+        const double score = BackwardReferenceScore(average_cost_,
+                                                    start_cost4,
+                                                    start_cost3,
+                                                    start_cost2,
+                                                    len, backward,
+                                                    last_distance1_,
+                                                    last_distance2_,
+                                                    last_distance3_,
+                                                    last_distance4_);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          best_ix = backward;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = best_ix;
+          *best_score_out = best_score;
+          match_found = true;
+        }
+      }
+    }
+    const uint32_t key = Hash3Bytes(&data[cur_ix_masked], kBucketBits);
+    const int * __restrict const bucket = &buckets_[key][0];
+    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    int stop = int(cur_ix) - 64;
+    if (stop < 0) { stop = 0; }
+
+    start_cost2 -= 1.0;
+    for (int i = cur_ix - 1; i > stop; --i) {
+      size_t prev_ix = i;
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (data[cur_ix_masked] != data[prev_ix] ||
+          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+        continue;
+      }
+      int len = 2;
+      const double score = start_cost2 - 1.70 * Log2Floor(backward);
+
+      if (best_score < score) {
+        best_score = score;
+        best_len = len;
+        best_ix = backward;
+        *best_len_out = best_len;
+        *best_len_code_out = best_len;
+        *best_distance_out = best_ix;
+        match_found = true;
+      }
+    }
+    for (int i = num_[key] - 1; i >= down; --i) {
+      int prev_ix = bucket[i & kBlockMask];
+      if (prev_ix < 0) {
+        continue;
+      } else {
+        const size_t backward = cur_ix - prev_ix;
+        if (PREDICT_FALSE(backward > max_backward)) {
+          break;
+        }
+        prev_ix &= ring_buffer_mask;
+        if (data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+          continue;
+        }
+        const size_t len =
+            FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                     max_length);
+        if (len >= 3) {
+          // Comparing for >= 3 does not change the semantics, but just saves
+          // for a few unnecessary binary logarithms in backward reference
+          // score, since we are not interested in such short matches.
+          const double score = BackwardReferenceScore(average_cost_,
+                                                      start_cost4,
+                                                      start_cost3,
+                                                      start_cost2,
+                                                      len, backward,
+                                                      last_distance1_,
+                                                      last_distance2_,
+                                                      last_distance3_,
+                                                      last_distance4_);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            best_ix = backward;
+            *best_len_out = best_len;
+            *best_len_code_out = best_len;
+            *best_distance_out = best_ix;
+            *best_score_out = best_score;
+            match_found = true;
+          }
+        }
+      }
+    }
+    return match_found;
+  }
+
+  void set_last_distance(int v) {
+    if (last_distance1_ != v) {
+      last_distance4_ = last_distance3_;
+      last_distance3_ = last_distance2_;
+      last_distance2_ = last_distance1_;
+      last_distance1_ = v;
+    }
+  }
+
+  int last_distance() const { return last_distance1_; }
+
+  void set_insert_length(int v) { insert_length_ = v; }
+
+  void set_average_cost(double v) { average_cost_ = v; }
+
+ private:
+  // Number of hash buckets.
+  static const uint32_t kBucketSize = 1 << kBucketBits;
+
+  // Only kBlockSize newest backward references are kept,
+  // and the older are forgotten.
+  static const uint32_t kBlockSize = 1 << kBlockBits;
+
+  // Mask for accessing entries in a block (in a ringbuffer manner).
+  static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
+
+  // Number of entries in a particular bucket.
+  uint16_t num_[kBucketSize];
+
+  // Buckets containing kBlockSize of backward references.
+  int buckets_[kBucketSize][kBlockSize];
+
+  int last_distance1_;
+  int last_distance2_;
+  int last_distance3_;
+  int last_distance4_;
+
+  // Cost adjustment for how many literals we are planning to insert
+  // anyway.
+  int insert_length_;
+
+  double average_cost_;
+};
+
+typedef HashLongestMatch<13, 11> Hasher;
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_HASH_H_
diff --git a/brotli/enc/histogram.cc b/brotli/enc/histogram.cc
new file mode 100644
index 0000000..910b987
--- /dev/null
+++ b/brotli/enc/histogram.cc
@@ -0,0 +1,94 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Build per-context histograms of literals, commands and distance codes.
+
+#include "./histogram.h"
+
+#include <stdint.h>
+#include <cmath>
+
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./context.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+void BuildHistograms(
+    const std::vector<Command>& cmds,
+    const BlockSplit& literal_split,
+    const BlockSplit& insert_and_copy_split,
+    const BlockSplit& dist_split,
+    const uint8_t* ringbuffer,
+    size_t pos,
+    size_t mask,
+    const std::vector<int>& context_modes,
+    std::vector<HistogramLiteral>* literal_histograms,
+    std::vector<HistogramCommand>* insert_and_copy_histograms,
+    std::vector<HistogramDistance>* copy_dist_histograms) {
+  BlockSplitIterator literal_it(literal_split);
+  BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
+  BlockSplitIterator dist_it(dist_split);
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command &cmd = cmds[i];
+    insert_and_copy_it.Next();
+    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
+        cmd.command_prefix_);
+    for (int j = 0; j < cmd.insert_length_; ++j) {
+      literal_it.Next();
+      uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
+      uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
+      int context = (literal_it.type_ << kLiteralContextBits) +
+          Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
+      (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
+      ++pos;
+    }
+    pos += cmd.copy_length_;
+    if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+      dist_it.Next();
+      int context = (dist_it.type_ << kDistanceContextBits) +
+          ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
+      (*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
+    }
+  }
+}
+
+void BuildLiteralHistogramsForBlockType(
+    const std::vector<Command>& cmds,
+    const BlockSplit& literal_split,
+    const uint8_t* ringbuffer,
+    size_t pos,
+    size_t mask,
+    int block_type,
+    int context_mode,
+    std::vector<HistogramLiteral>* histograms) {
+  BlockSplitIterator literal_it(literal_split);
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command &cmd = cmds[i];
+    for (int j = 0; j < cmd.insert_length_; ++j) {
+      literal_it.Next();
+      if (literal_it.type_ == block_type) {
+        uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
+        uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
+        int context = Context(prev_byte, prev_byte2, context_mode);
+        (*histograms)[context].Add(ringbuffer[pos & mask]);
+      }
+      ++pos;
+    }
+    pos += cmd.copy_length_;
+  }
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/histogram.h b/brotli/enc/histogram.h
new file mode 100644
index 0000000..45726f5
--- /dev/null
+++ b/brotli/enc/histogram.h
@@ -0,0 +1,114 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Models the histograms of literals, commands and distance codes.
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+#include "./command.h"
+#include "./fast_log.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+class BlockSplit;
+
+// A simple container for histograms of data in blocks.
+template<int kDataSize>
+struct Histogram {
+  Histogram() {
+    Clear();
+  }
+  void Clear() {
+    memset(data_, 0, sizeof(data_));
+    total_count_ = 0;
+  }
+  void Add(int val) {
+    ++data_[val];
+    ++total_count_;
+  }
+  void Remove(int val) {
+    --data_[val];
+    --total_count_;
+  }
+  template<typename DataType>
+  void Add(const DataType *p, size_t n) {
+    total_count_ += n;
+    n += 1;
+    while(--n) ++data_[*p++];
+  }
+  void AddHistogram(const Histogram& v) {
+    total_count_ += v.total_count_;
+    for (int i = 0; i < kDataSize; ++i) {
+      data_[i] += v.data_[i];
+    }
+  }
+  double EntropyBitCost() const {
+    double retval = total_count_ * FastLog2(total_count_);
+    for (int i = 0; i < kDataSize; ++i) {
+      retval -= data_[i] * FastLog2(data_[i]);
+    }
+    return retval;
+  }
+
+  int data_[kDataSize];
+  int total_count_;
+  double bit_cost_;
+};
+
+// Literal histogram.
+typedef Histogram<256> HistogramLiteral;
+// Prefix histograms.
+typedef Histogram<kNumCommandPrefixes> HistogramCommand;
+typedef Histogram<kNumDistancePrefixes> HistogramDistance;
+typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
+// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
+typedef Histogram<272> HistogramContextMap;
+// Block type histogram, 256 block types + 2 special symbols.
+typedef Histogram<258> HistogramBlockType;
+
+static const int kLiteralContextBits = 6;
+static const int kDistanceContextBits = 2;
+
+void BuildHistograms(
+    const std::vector<Command>& cmds,
+    const BlockSplit& literal_split,
+    const BlockSplit& insert_and_copy_split,
+    const BlockSplit& dist_split,
+    const uint8_t* ringbuffer,
+    size_t pos,
+    size_t mask,
+    const std::vector<int>& context_modes,
+    std::vector<HistogramLiteral>* literal_histograms,
+    std::vector<HistogramCommand>* insert_and_copy_histograms,
+    std::vector<HistogramDistance>* copy_dist_histograms);
+
+void BuildLiteralHistogramsForBlockType(
+    const std::vector<Command>& cmds,
+    const BlockSplit& literal_split,
+    const uint8_t* ringbuffer,
+    size_t pos,
+    size_t mask,
+    int block_type,
+    int context_mode,
+    std::vector<HistogramLiteral>* histograms);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_HISTOGRAM_H_
diff --git a/brotli/enc/literal_cost.cc b/brotli/enc/literal_cost.cc
new file mode 100644
index 0000000..2a388d7
--- /dev/null
+++ b/brotli/enc/literal_cost.cc
@@ -0,0 +1,62 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#include "./literal_cost.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <algorithm>
+
+namespace brotli {
+
+void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                 const uint8_t *data, float *cost) {
+  int histogram[256] = { 0 };
+  int window_half = 2000;
+  int in_window = std::min(static_cast<size_t>(window_half), len);
+
+  // Bootstrap histogram.
+  for (int i = 0; i < in_window; ++i) {
+    ++histogram[data[(pos + i) & mask]];
+  }
+
+  // Compute bit costs with sliding window.
+  for (int i = 0; i < len; ++i) {
+    if (i - window_half >= 0) {
+      // Remove a byte in the past.
+      --histogram[data[(pos + i - window_half) & mask]];
+      --in_window;
+    }
+    if (i + window_half < len) {
+      // Add a byte in the future.
+      ++histogram[data[(pos + i + window_half) & mask]];
+      ++in_window;
+    }
+    int masked_pos = (pos + i) & mask;
+    int histo = histogram[data[masked_pos]];
+    if (histo == 0) {
+      histo = 1;
+    }
+    cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
+    cost[masked_pos] += 0.03;
+    if (cost[masked_pos] < 1.0) {
+      cost[masked_pos] *= 0.5;
+      cost[masked_pos] += 0.5;
+    }
+  }
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/literal_cost.h b/brotli/enc/literal_cost.h
new file mode 100644
index 0000000..fd7f325
--- /dev/null
+++ b/brotli/enc/literal_cost.h
@@ -0,0 +1,33 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Estimates how many bits the literals in the interval [pos, pos + len) in the
+// ringbuffer (data, mask) will take entropy coded and writes these estimates
+// to the ringbuffer (cost, mask).
+void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                 const uint8_t *data, float *cost);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_LITERAL_COST_H_
diff --git a/brotli/enc/port.h b/brotli/enc/port.h
new file mode 100644
index 0000000..36a365e
--- /dev/null
+++ b/brotli/enc/port.h
@@ -0,0 +1,138 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Macros for endianness, branch prediction and unaligned loads and stores.
+
+#ifndef BROTLI_ENC_PORT_H_
+#define BROTLI_ENC_PORT_H_
+
+#if defined OS_LINUX || defined OS_CYGWIN
+#include <endian.h>
+#elif defined OS_FREEBSD
+#include <machine/endian.h>
+#elif defined OS_MACOSX
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER  BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#endif
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif
+#endif  // __BYTE_ORDER
+
+#if defined(COMPILER_GCC3)
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) x
+#define PREDICT_TRUE(x) x
+#endif
+
+// Portable handling of unaligned loads, stores, and copies.
+// On some platforms, like ARM, the copy functions can be more efficient
+// then a load and a store.
+
+#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
+  defined(ARCH_K8) || defined(_ARCH_PPC)
+
+// x86 and x86-64 can perform unaligned loads/stores directly;
+// modern PowerPC hardware can also do unaligned integer loads and stores;
+// but note: the FPU still sends unaligned loads and stores to a trap handler!
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+#define BROTLI_UNALIGNED_STORE64(_p, _val) \
+  (*reinterpret_cast<uint64_t *>(_p) = (_val))
+
+#elif defined(__arm__) && \
+  !defined(__ARM_ARCH_5__) && \
+  !defined(__ARM_ARCH_5T__) && \
+  !defined(__ARM_ARCH_5TE__) && \
+  !defined(__ARM_ARCH_5TEJ__) && \
+  !defined(__ARM_ARCH_6__) && \
+  !defined(__ARM_ARCH_6J__) && \
+  !defined(__ARM_ARCH_6K__) && \
+  !defined(__ARM_ARCH_6Z__) && \
+  !defined(__ARM_ARCH_6ZK__) && \
+  !defined(__ARM_ARCH_6T2__)
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode).
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+  uint32_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+#endif  // BROTLI_ENC_PORT_H_
diff --git a/brotli/enc/prefix.cc b/brotli/enc/prefix.cc
new file mode 100644
index 0000000..3e43501
--- /dev/null
+++ b/brotli/enc/prefix.cc
@@ -0,0 +1,166 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#include "./prefix.h"
+
+#include "./fast_log.h"
+
+namespace brotli {
+
+// Represents the range of values belonging to a prefix code:
+// [offset, offset + 2^nbits)
+struct PrefixCodeRange {
+  int offset;
+  int nbits;
+};
+
+static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
+  {   0,  0}, {   1,  0}, {  2,   0}, {    3,  0},
+  {   4,  0}, {   5,  0}, {  6,   1}, {    8,  1},
+  {  10,  2}, {  14,  2}, { 18,   3}, {   26,  3},
+  {  34,  4}, {  50,  4}, { 66,   5}, {   98,  5},
+  { 130,  6}, { 194,  7}, { 322,  8}, {  578,  9},
+  {1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
+};
+
+static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
+  {  2, 0}, {   3,  0}, {   4,  0}, {   5,  0},
+  {  6, 0}, {   7,  0}, {   8,  0}, {   9,  0},
+  { 10, 1}, {  12,  1}, {  14,  2}, {  18,  2},
+  { 22, 3}, {  30,  3}, {  38,  4}, {  54,  4},
+  { 70, 5}, { 102,  5}, { 134,  6}, { 198,  7},
+  {326, 8}, { 582,  9}, {1094, 10}, {2118, 24},
+};
+
+static const int kInsertAndCopyRangeLut[9] = {
+  0, 1, 4, 2, 3, 6, 5, 7, 8,
+};
+
+static const int kInsertRangeLut[9] = {
+  0, 0, 1, 1, 0, 2, 1, 2, 2,
+};
+
+static const int kCopyRangeLut[9] = {
+  0, 1, 0, 1, 2, 0, 2, 1, 2,
+};
+
+int InsertLengthPrefix(int length) {
+  for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
+    const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
+    if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int CopyLengthPrefix(int length) {
+  for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
+    const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
+    if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int CommandPrefix(int insert_length, int copy_length) {
+  if (copy_length == 0) {
+    copy_length = 3;
+  }
+  int insert_prefix = InsertLengthPrefix(insert_length);
+  int copy_prefix = CopyLengthPrefix(copy_length);
+  int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
+  return ((kInsertAndCopyRangeLut[range_idx] << 6) +
+          ((insert_prefix & 7) << 3) + (copy_prefix & 7));
+}
+
+int InsertLengthExtraBits(int code) {
+  int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+  return kInsertLengthPrefixCode[insert_code].nbits;
+}
+
+int InsertLengthOffset(int code) {
+  int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+  return kInsertLengthPrefixCode[insert_code].offset;
+}
+
+int CopyLengthExtraBits(int code) {
+  int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+  return kCopyLengthPrefixCode[copy_code].nbits;
+}
+
+int CopyLengthOffset(int code) {
+  int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+  return kCopyLengthPrefixCode[copy_code].offset;
+}
+
+void PrefixEncodeCopyDistance(int distance_code,
+                              int num_direct_codes,
+                              int postfix_bits,
+                              uint16_t* code,
+                              int* nbits,
+                              uint32_t* extra_bits) {
+  distance_code -= 1;
+  if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
+    *code = distance_code;
+    *nbits = 0;
+    *extra_bits = 0;
+    return;
+  }
+  distance_code -= kNumDistanceShortCodes + num_direct_codes;
+  distance_code += (1 << (postfix_bits + 2));
+  int bucket = Log2Floor(distance_code) - 1;
+  int postfix_mask = (1 << postfix_bits) - 1;
+  int postfix = distance_code & postfix_mask;
+  int prefix = (distance_code >> bucket) & 1;
+  int offset = (2 + prefix) << bucket;
+  *nbits = bucket - postfix_bits;
+  *code = kNumDistanceShortCodes + num_direct_codes +
+      ((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
+  *extra_bits = (distance_code - offset) >> postfix_bits;
+}
+
+int BlockLengthPrefix(int length) {
+  for (int i = 0; i < kNumBlockLenPrefixes; ++i) {
+    const PrefixCodeRange& range = kBlockLengthPrefixCode[i];
+    if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int BlockLengthExtraBits(int length_code) {
+  return kBlockLengthPrefixCode[length_code].nbits;
+}
+
+int BlockLengthOffset(int length_code) {
+  return kBlockLengthPrefixCode[length_code].offset;
+}
+
+}  // namespace brotli
diff --git a/brotli/enc/prefix.h b/brotli/enc/prefix.h
new file mode 100644
index 0000000..47974f8
--- /dev/null
+++ b/brotli/enc/prefix.h
@@ -0,0 +1,51 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+static const int kNumInsertLenPrefixes = 24;
+static const int kNumCopyLenPrefixes = 24;
+static const int kNumCommandPrefixes = 704;
+static const int kNumBlockLenPrefixes = 26;
+static const int kNumDistanceShortCodes = 16;
+static const int kNumDistancePrefixes = 520;
+
+int CommandPrefix(int insert_length, int copy_length);
+int InsertLengthExtraBits(int prefix);
+int InsertLengthOffset(int prefix);
+int CopyLengthExtraBits(int prefix);
+int CopyLengthOffset(int prefix);
+
+void PrefixEncodeCopyDistance(int distance_code,
+                              int num_direct_codes,
+                              int shift_bits,
+                              uint16_t* prefix,
+                              int* nbits,
+                              uint32_t* extra_bits);
+
+int BlockLengthPrefix(int length);
+int BlockLengthExtraBits(int prefix);
+int BlockLengthOffset(int prefix);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_PREFIX_H_
diff --git a/brotli/enc/ringbuffer.h b/brotli/enc/ringbuffer.h
new file mode 100644
index 0000000..d88f2ca
--- /dev/null
+++ b/brotli/enc/ringbuffer.h
@@ -0,0 +1,89 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Sliding window over the input data.
+
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
+// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+// data in a circular manner: writing a byte writes it to
+// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
+// contains another copy of the first `1 << tail_bits' bytes:
+// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
+class RingBuffer {
+ public:
+  RingBuffer(int window_bits, int tail_bits)
+      : window_bits_(window_bits), tail_bits_(tail_bits), pos_(0) {
+    static const int kSlackForThreeByteHashingEverywhere = 2;
+    const int buflen = (1 << window_bits_) + (1 << tail_bits_);
+    buffer_ = new uint8_t[buflen + kSlackForThreeByteHashingEverywhere];
+    for (int i = 0; i < kSlackForThreeByteHashingEverywhere; ++i) {
+      buffer_[buflen + i] = 0;
+    }
+  }
+  ~RingBuffer() {
+    delete [] buffer_;
+  }
+
+  // Push bytes into the ring buffer.
+  void Write(const uint8_t *bytes, size_t n) {
+    const size_t masked_pos = pos_ & ((1 << window_bits_) - 1);
+    // The length of the writes is limited so that we do not need to worry
+    // about a write
+    WriteTail(bytes, n);
+    if (masked_pos + n <= (1 << window_bits_)) {
+      // A single write fits.
+      memcpy(&buffer_[masked_pos], bytes, n);
+    } else {
+      // Split into two writes.
+      // Copy into the end of the buffer, including the tail buffer.
+      memcpy(&buffer_[masked_pos], bytes,
+             std::min(n,
+                      ((1 << window_bits_) + (1 << tail_bits_)) - masked_pos));
+      // Copy into the begining of the buffer
+      memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
+             n - ((1 << window_bits_) - masked_pos));
+    }
+    pos_ += n;
+  }
+
+  // Logical cursor position in the ring buffer.
+  size_t position() const { return pos_; }
+
+  uint8_t *start() { return &buffer_[0]; }
+  const uint8_t *start() const { return &buffer_[0]; }
+
+ private:
+  void WriteTail(const uint8_t *bytes, size_t n) {
+    const size_t masked_pos = pos_ & ((1 << window_bits_) - 1);
+    if (masked_pos < (1 << tail_bits_)) {
+      // Just fill the tail buffer with the beginning data.
+      const size_t p = (1 << window_bits_) + masked_pos;
+      memcpy(&buffer_[p], bytes, std::min(n, (1 << tail_bits_) - masked_pos));
+    }
+  }
+
+  // Size of the ringbuffer is (1 << window_bits) + (1 << tail_bits).
+  const int window_bits_;
+  const int tail_bits_;
+
+  // Position to write in the ring buffer.
+  size_t pos_;
+  // The actual ring buffer containing the data and the copy of the beginning
+  // as a tail.
+  uint8_t *buffer_;
+};
+
+#endif  // BROTLI_ENC_RINGBUFFER_H_
diff --git a/brotli/enc/write_bits.h b/brotli/enc/write_bits.h
new file mode 100644
index 0000000..cf6f53e
--- /dev/null
+++ b/brotli/enc/write_bits.h
@@ -0,0 +1,95 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Write bits into a byte array.
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include <assert.h>
+#if defined(OS_MACOSX)
+  #include <machine/endian.h>
+#else
+  #include <endian.h>
+#endif
+#include <stdint.h>
+#include <stdio.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+//#define BIT_WRITER_DEBUG
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 56 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0     BYTE+1       BYTE+2
+//
+// 0000 0RRR    0000 0000    0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1, BYTE+2, etc.
+inline void WriteBits(int n_bits,
+                      uint64_t bits,
+                      int * __restrict pos,
+                      uint8_t * __restrict array) {
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
+#endif
+#ifdef IS_LITTLE_ENDIAN
+  // This branch of the code can write up to 56 bits at a time,
+  // 7 bits are lost by being perhaps already in *p and at least
+  // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+  // bits are in *p and we write 57 bits, then the next write will
+  // access a byte that was never initialized).
+  uint8_t *p = &array[*pos >> 3];
+  uint64_t v = *p;
+  v |= bits << (*pos & 7);
+  BROTLI_UNALIGNED_STORE64(p, v);  // Set some bits.
+  *pos += n_bits;
+#else
+  // implicit & 0xff is assumed for uint8_t arithmetics
+  uint8_t *array_pos = &array[*pos >> 3];
+  const int bits_reserved_in_first_byte = (*pos & 7);
+  bits <<= bits_reserved_in_first_byte;
+  *array_pos++ |= bits;
+  for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+       bits_left_to_write >= 1;
+       bits_left_to_write -= 8) {
+    bits >>= 8;
+    *array_pos++ = bits;
+  }
+  *array_pos = 0;
+  *pos += n_bits;
+#endif
+}
+
+inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBitsPrepareStorage            %10d\n", pos);
+#endif
+  assert((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_WRITE_BITS_H_
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..c1ff87d
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,32 @@
+<project name="compression" default="jar">
+  <target name="clean">
+    <delete dir="build/classes" />
+    <delete dir="build/jar" />
+  </target>
+
+  <target name="compile">
+    <mkdir dir="build/classes" />
+    <javac srcdir="src" destdir="build/classes" includeantruntime="false" debug="true">
+      <compilerarg value="-Xlint" />
+      <classpath>
+        <fileset dir="lib" includes="*.jar" />
+      </classpath>
+    </javac>
+  </target>
+
+  <target name="jar" depends="compile">
+    <mkdir dir="build/jar" />
+    <jar destfile="build/jar/compression.jar" basedir="build/classes">
+      <zipfileset src="lib/eotconverter.jar" />
+      <zipfileset src="lib/guava-11.0.1.jar" />
+      <zipfileset src="lib/sfntly.jar" />
+      <zipfileset src="lib/woffconverter.jar" />
+      <zipfileset src="lib/lzma.jar" />
+      <manifest>
+        <attribute name="Main-Class" value="com.google.typography.font.compression.SimpleRunner" />
+      </manifest>
+    </jar>
+  </target>
+
+</project>
+
diff --git a/docs/WOFFUltraCondensedfileformat.pdf b/docs/WOFFUltraCondensedfileformat.pdf
new file mode 100644
index 0000000..24c1e04
--- /dev/null
+++ b/docs/WOFFUltraCondensedfileformat.pdf
Binary files differ
diff --git a/ots-lzma.patch b/ots-lzma.patch
new file mode 100644
index 0000000..0cf22a5
--- /dev/null
+++ b/ots-lzma.patch
@@ -0,0 +1,5500 @@
+Index: test/lzma.cc
+===================================================================
+--- test/lzma.cc	(revision 0)
++++ test/lzma.cc	(revision 0)
+@@ -0,0 +1,139 @@
++// Copyright (c) 2012 The Chromium Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include <arpa/inet.h>
++#include <fcntl.h>
++#include <sys/stat.h>
++#include <unistd.h>
++
++#include <cstdio>
++#include <string>
++#include <vector>
++
++#include "opentype-sanitiser.h"
++#include "ots-memory-stream.h"
++
++#include "third_party/lzma_sdk/LzmaLib.h"
++
++namespace {
++
++static const size_t kCompressedLengthFieldSize = 4;
++
++int Usage(const char *argv0) {
++  std::fprintf(stderr, "Usage: %s (compress|decompress) filename\n", argv0);
++  return 1;
++}
++
++bool ReadFile(const char *file_name, std::vector<uint8_t>* data) {
++  const int fd = open(file_name, O_RDONLY);
++  if (fd < 0) {
++    return false;
++  }
++
++  struct stat st;
++  fstat(fd, &st);
++
++  data->resize(st.st_size);
++  if (read(fd, &(*data)[0], st.st_size) != st.st_size) {
++    close(fd);
++    return false;
++  }
++  close(fd);
++  return true;
++}
++
++bool Compress(std::vector<uint8_t>* input, std::vector<uint8_t>* output) {
++  size_t props_size = LZMA_PROPS_SIZE;
++  size_t out_len = input->size() * 2;
++  output->resize(out_len + props_size + kCompressedLengthFieldSize);
++
++  uint8_t* output_start = &(*output)[kCompressedLengthFieldSize];
++
++  int result = LzmaCompress(output_start + LZMA_PROPS_SIZE, &out_len,
++                            &(*input)[0], input->size(),
++                            output_start, &props_size,
++                            -1, 0, -1, -1, -1, -1, 1);
++  if (props_size != LZMA_PROPS_SIZE || result != SZ_OK)
++    return false;
++
++  output->resize(props_size + out_len + kCompressedLengthFieldSize);
++  // Store the uncompressed length at the beginning of buffer.
++  uint32_t uncompressed_length = htonl(input->size());
++  memcpy(&(*output)[0], &uncompressed_length, kCompressedLengthFieldSize);
++  return true;
++}
++
++bool Decompress(std::vector<uint8_t>* input, std::vector<uint8_t>* output) {
++  if (input->size() < kCompressedLengthFieldSize + LZMA_PROPS_SIZE)
++    return false;
++
++  // Assume the uncompressed length is stored at the beginning of the buffer
++  // in network byte order.
++  uint32_t uncompressed_length = 0;
++  memcpy(&uncompressed_length, &(*input)[0], kCompressedLengthFieldSize);
++  uncompressed_length = ntohl(uncompressed_length);
++
++  output->resize(uncompressed_length);
++  uint8_t* input_start = &(*input)[kCompressedLengthFieldSize];
++  size_t in_len = input->size() - LZMA_PROPS_SIZE;
++  size_t out_len = output->size();
++  int result = LzmaUncompress(&(*output)[0], &out_len,
++                              input_start + LZMA_PROPS_SIZE,
++                              &in_len, input_start, LZMA_PROPS_SIZE);
++
++  return result == SZ_OK;
++}
++
++bool DumpResult(std::vector<uint8_t>* result, const std::string* file_name) {
++  int fd = open(file_name->c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0600);
++  if (fd < 0) {
++    perror("opening output file");
++    return false;
++  }
++  if (write(fd, &(*result)[0], result->size()) < 0) {
++    perror("writing output file");
++    close(fd);
++    return false;
++  }
++  close(fd);
++  return true;
++}
++
++} // namespace
++
++int main(int argc, char** argv) {
++  if (argc != 3) return Usage(argv[0]);
++
++  std::vector<uint8_t> in_data;
++  if (!ReadFile(argv[2], &in_data)) {
++    std::fprintf(stderr, "Failed to read file!\n");
++    return 1;
++  }
++
++  std::vector<uint8_t> out_data;
++  std::string file_name;
++  if (std::strncmp("compress", argv[1], 8) == 0) {
++    if (!Compress(&in_data, &out_data)) {
++      std::fprintf(stderr, "Failed to compress file.\n");
++      return 1;
++    }
++    file_name = "compressed.dat";
++  } else if (std::strncmp("decompress", argv[1], 10) == 0) {
++    if (!Decompress(&in_data, &out_data)) {
++      std::fprintf(stderr, "Failed to decompress file.\n");
++      return 1;
++    }
++    file_name = "decompressed.dat";
++  } else {
++    std::fprintf(
++        stderr,
++        "The second argument must be either 'compress' or 'decompress'.");
++    return 1;
++  }
++
++  if (!DumpResult(&out_data, &file_name)) {
++    std::fprintf(stderr, "Failed to write the result.\n");
++    return 1;
++  }
++}
+
+Property changes on: test/lzma.cc
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: ots-common.gypi
+===================================================================
+--- ots-common.gypi	(revision 83)
++++ ots-common.gypi	(working copy)
+@@ -68,6 +68,7 @@
+     ],
+     'ots_include_dirs': [
+       'include',
++      '.',
+     ],
+   },
+ }
+Index: ots-standalone.gyp
+===================================================================
+--- ots-standalone.gyp	(revision 83)
++++ ots-standalone.gyp	(working copy)
+@@ -96,6 +96,12 @@
+           '<@(ots_include_dirs)',
+         ],
+       },
++      'dependencies': [
++        'third_party/lzma_sdk/lzma_sdk.gyp:lzma_sdk',
++      ],
++      'export_dependent_settings': [
++        'third_party/lzma_sdk/lzma_sdk.gyp:lzma_sdk',
++      ],
+     },
+     {
+       'target_name': 'idempotent',
+@@ -117,5 +123,15 @@
+         }],
+       ],
+     },
++    {
++      'target_name': 'lzma',
++      'type': 'executable',
++      'sources': [
++        'test/lzma.cc',
++      ],
++      'dependencies': [
++        'ots',
++      ],
++    },
+   ],
+ }
+Index: third_party/lzma_sdk/LzmaEnc.h
+===================================================================
+--- third_party/lzma_sdk/LzmaEnc.h	(revision 0)
++++ third_party/lzma_sdk/LzmaEnc.h	(revision 0)
+@@ -0,0 +1,80 @@
++/*  LzmaEnc.h -- LZMA Encoder
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZMA_ENC_H
++#define __LZMA_ENC_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#define LZMA_PROPS_SIZE 5
++
++typedef struct _CLzmaEncProps
++{
++  int level;       /*  0 <= level <= 9 */
++  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
++                      (1 << 12) <= dictSize <= (1 << 30) for 64-bit version
++                       default = (1 << 24) */
++  int lc;          /* 0 <= lc <= 8, default = 3 */
++  int lp;          /* 0 <= lp <= 4, default = 0 */
++  int pb;          /* 0 <= pb <= 4, default = 2 */
++  int algo;        /* 0 - fast, 1 - normal, default = 1 */
++  int fb;          /* 5 <= fb <= 273, default = 32 */
++  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
++  int numHashBytes; /* 2, 3 or 4, default = 4 */
++  UInt32 mc;        /* 1 <= mc <= (1 << 30), default = 32 */
++  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
++  int numThreads;  /* 1 or 2, default = 2 */
++} CLzmaEncProps;
++
++void LzmaEncProps_Init(CLzmaEncProps *p);
++void LzmaEncProps_Normalize(CLzmaEncProps *p);
++UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
++
++
++/* ---------- CLzmaEncHandle Interface ---------- */
++
++/* LzmaEnc_* functions can return the following exit codes:
++Returns:
++  SZ_OK           - OK
++  SZ_ERROR_MEM    - Memory allocation error
++  SZ_ERROR_PARAM  - Incorrect paramater in props
++  SZ_ERROR_WRITE  - Write callback error.
++  SZ_ERROR_PROGRESS - some break from progress callback
++  SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
++*/
++
++typedef void * CLzmaEncHandle;
++
++CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
++void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
++SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
++SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
++SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
++    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
++SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++    int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
++
++/* ---------- One Call Interface ---------- */
++
++/* LzmaEncode
++Return code:
++  SZ_OK               - OK
++  SZ_ERROR_MEM        - Memory allocation error
++  SZ_ERROR_PARAM      - Incorrect paramater
++  SZ_ERROR_OUTPUT_EOF - output buffer overflow
++  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
++*/
++
++SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
++    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzmaEnc.h
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzHash.h
+===================================================================
+--- third_party/lzma_sdk/LzHash.h	(revision 0)
++++ third_party/lzma_sdk/LzHash.h	(revision 0)
+@@ -0,0 +1,54 @@
++/* LzHash.h -- HASH functions for LZ algorithms
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZ_HASH_H
++#define __LZ_HASH_H
++
++#define kHash2Size (1 << 10)
++#define kHash3Size (1 << 16)
++#define kHash4Size (1 << 20)
++
++#define kFix3HashSize (kHash2Size)
++#define kFix4HashSize (kHash2Size + kHash3Size)
++#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
++
++#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
++
++#define HASH3_CALC { \
++  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++  hash2Value = temp & (kHash2Size - 1); \
++  hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
++
++#define HASH4_CALC { \
++  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++  hash2Value = temp & (kHash2Size - 1); \
++  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
++  hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
++
++#define HASH5_CALC { \
++  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++  hash2Value = temp & (kHash2Size - 1); \
++  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
++  hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
++  hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
++  hash4Value &= (kHash4Size - 1); }
++
++/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
++#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
++
++
++#define MT_HASH2_CALC \
++  hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
++
++#define MT_HASH3_CALC { \
++  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++  hash2Value = temp & (kHash2Size - 1); \
++  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
++
++#define MT_HASH4_CALC { \
++  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++  hash2Value = temp & (kHash2Size - 1); \
++  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
++  hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzHash.h
+___________________________________________________________________
+Added: svn:executable
+   + *
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/Alloc.h
+===================================================================
+--- third_party/lzma_sdk/Alloc.h	(revision 0)
++++ third_party/lzma_sdk/Alloc.h	(revision 0)
+@@ -0,0 +1,38 @@
++/* Alloc.h -- Memory allocation functions
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __COMMON_ALLOC_H
++#define __COMMON_ALLOC_H
++
++#include <stddef.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void *MyAlloc(size_t size);
++void MyFree(void *address);
++
++#ifdef _WIN32
++
++void SetLargePageSize();
++
++void *MidAlloc(size_t size);
++void MidFree(void *address);
++void *BigAlloc(size_t size);
++void BigFree(void *address);
++
++#else
++
++#define MidAlloc(size) MyAlloc(size)
++#define MidFree(address) MyFree(address)
++#define BigAlloc(size) MyAlloc(size)
++#define BigFree(address) MyFree(address)
++
++#endif
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/Alloc.h
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzmaLib.h
+===================================================================
+--- third_party/lzma_sdk/LzmaLib.h	(revision 0)
++++ third_party/lzma_sdk/LzmaLib.h	(revision 0)
+@@ -0,0 +1,135 @@
++/* LzmaLib.h -- LZMA library interface
++2009-04-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZMA_LIB_H
++#define __LZMA_LIB_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#define MY_STDAPI int MY_STD_CALL
++
++#define LZMA_PROPS_SIZE 5
++
++/*
++RAM requirements for LZMA:
++  for compression:   (dictSize * 11.5 + 6 MB) + state_size
++  for decompression: dictSize + state_size
++    state_size = (4 + (1.5 << (lc + lp))) KB
++    by default (lc=3, lp=0), state_size = 16 KB.
++
++LZMA properties (5 bytes) format
++    Offset Size  Description
++      0     1    lc, lp and pb in encoded form.
++      1     4    dictSize (little endian).
++*/
++
++/*
++LzmaCompress
++------------
++
++outPropsSize -
++     In:  the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
++     Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
++
++  LZMA Encoder will use defult values for any parameter, if it is
++  -1  for any from: level, loc, lp, pb, fb, numThreads
++   0  for dictSize
++  
++level - compression level: 0 <= level <= 9;
++
++  level dictSize algo  fb
++    0:    16 KB   0    32
++    1:    64 KB   0    32
++    2:   256 KB   0    32
++    3:     1 MB   0    32
++    4:     4 MB   0    32
++    5:    16 MB   1    32
++    6:    32 MB   1    32
++    7+:   64 MB   1    64
++ 
++  The default value for "level" is 5.
++
++  algo = 0 means fast method
++  algo = 1 means normal method
++
++dictSize - The dictionary size in bytes. The maximum value is
++        128 MB = (1 << 27) bytes for 32-bit version
++          1 GB = (1 << 30) bytes for 64-bit version
++     The default value is 16 MB = (1 << 24) bytes.
++     It's recommended to use the dictionary that is larger than 4 KB and
++     that can be calculated as (1 << N) or (3 << N) sizes.
++
++lc - The number of literal context bits (high bits of previous literal).
++     It can be in the range from 0 to 8. The default value is 3.
++     Sometimes lc=4 gives the gain for big files.
++
++lp - The number of literal pos bits (low bits of current position for literals).
++     It can be in the range from 0 to 4. The default value is 0.
++     The lp switch is intended for periodical data when the period is equal to 2^lp.
++     For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
++     better to set lc=0, if you change lp switch.
++
++pb - The number of pos bits (low bits of current position).
++     It can be in the range from 0 to 4. The default value is 2.
++     The pb switch is intended for periodical data when the period is equal 2^pb.
++
++fb - Word size (the number of fast bytes).
++     It can be in the range from 5 to 273. The default value is 32.
++     Usually, a big number gives a little bit better compression ratio and
++     slower compression process.
++
++numThreads - The number of thereads. 1 or 2. The default value is 2.
++     Fast mode (algo = 0) can use only 1 thread.
++
++Out:
++  destLen  - processed output size
++Returns:
++  SZ_OK               - OK
++  SZ_ERROR_MEM        - Memory allocation error
++  SZ_ERROR_PARAM      - Incorrect paramater
++  SZ_ERROR_OUTPUT_EOF - output buffer overflow
++  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
++*/
++
++MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
++  unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
++  int level,      /* 0 <= level <= 9, default = 5 */
++  unsigned dictSize,  /* default = (1 << 24) */
++  int lc,        /* 0 <= lc <= 8, default = 3  */
++  int lp,        /* 0 <= lp <= 4, default = 0  */
++  int pb,        /* 0 <= pb <= 4, default = 2  */
++  int fb,        /* 5 <= fb <= 273, default = 32 */
++  int numThreads /* 1 or 2, default = 2 */
++  );
++
++/*
++LzmaUncompress
++--------------
++In:
++  dest     - output data
++  destLen  - output data size
++  src      - input data
++  srcLen   - input data size
++Out:
++  destLen  - processed output size
++  srcLen   - processed input size
++Returns:
++  SZ_OK                - OK
++  SZ_ERROR_DATA        - Data error
++  SZ_ERROR_MEM         - Memory allocation arror
++  SZ_ERROR_UNSUPPORTED - Unsupported properties
++  SZ_ERROR_INPUT_EOF   - it needs more bytes in input buffer (src)
++*/
++
++MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
++  const unsigned char *props, size_t propsSize);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzmaLib.h
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LICENSE
+===================================================================
+--- third_party/lzma_sdk/LICENSE	(revision 0)
++++ third_party/lzma_sdk/LICENSE	(revision 0)
+@@ -0,0 +1 @@
++LZMA SDK is placed in the public domain.
+Index: third_party/lzma_sdk/Types.h
+===================================================================
+--- third_party/lzma_sdk/Types.h	(revision 0)
++++ third_party/lzma_sdk/Types.h	(revision 0)
+@@ -0,0 +1,254 @@
++/* Types.h -- Basic types
++2010-10-09 : Igor Pavlov : Public domain */
++
++#ifndef __7Z_TYPES_H
++#define __7Z_TYPES_H
++
++#include <stddef.h>
++
++#ifdef _WIN32
++#include <windows.h>
++#endif
++
++#ifndef EXTERN_C_BEGIN
++#ifdef __cplusplus
++#define EXTERN_C_BEGIN extern "C" {
++#define EXTERN_C_END }
++#else
++#define EXTERN_C_BEGIN
++#define EXTERN_C_END
++#endif
++#endif
++
++EXTERN_C_BEGIN
++
++#define SZ_OK 0
++
++#define SZ_ERROR_DATA 1
++#define SZ_ERROR_MEM 2
++#define SZ_ERROR_CRC 3
++#define SZ_ERROR_UNSUPPORTED 4
++#define SZ_ERROR_PARAM 5
++#define SZ_ERROR_INPUT_EOF 6
++#define SZ_ERROR_OUTPUT_EOF 7
++#define SZ_ERROR_READ 8
++#define SZ_ERROR_WRITE 9
++#define SZ_ERROR_PROGRESS 10
++#define SZ_ERROR_FAIL 11
++#define SZ_ERROR_THREAD 12
++
++#define SZ_ERROR_ARCHIVE 16
++#define SZ_ERROR_NO_ARCHIVE 17
++
++typedef int SRes;
++
++#ifdef _WIN32
++typedef DWORD WRes;
++#else
++typedef int WRes;
++#endif
++
++#ifndef RINOK
++#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
++#endif
++
++typedef unsigned char Byte;
++typedef short Int16;
++typedef unsigned short UInt16;
++
++#ifdef _LZMA_UINT32_IS_ULONG
++typedef long Int32;
++typedef unsigned long UInt32;
++#else
++typedef int Int32;
++typedef unsigned int UInt32;
++#endif
++
++#ifdef _SZ_NO_INT_64
++
++/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
++   NOTES: Some code will work incorrectly in that case! */
++
++typedef long Int64;
++typedef unsigned long UInt64;
++
++#else
++
++#if defined(_MSC_VER) || defined(__BORLANDC__)
++typedef __int64 Int64;
++typedef unsigned __int64 UInt64;
++#define UINT64_CONST(n) n
++#else
++typedef long long int Int64;
++typedef unsigned long long int UInt64;
++#define UINT64_CONST(n) n ## ULL
++#endif
++
++#endif
++
++#ifdef _LZMA_NO_SYSTEM_SIZE_T
++typedef UInt32 SizeT;
++#else
++typedef size_t SizeT;
++#endif
++
++typedef int Bool;
++#define True 1
++#define False 0
++
++
++#ifdef _WIN32
++#define MY_STD_CALL __stdcall
++#else
++#define MY_STD_CALL
++#endif
++
++#ifdef _MSC_VER
++
++#if _MSC_VER >= 1300
++#define MY_NO_INLINE __declspec(noinline)
++#else
++#define MY_NO_INLINE
++#endif
++
++#define MY_CDECL __cdecl
++#define MY_FAST_CALL __fastcall
++
++#else
++
++#define MY_CDECL
++#define MY_FAST_CALL
++
++#endif
++
++
++/* The following interfaces use first parameter as pointer to structure */
++
++typedef struct
++{
++  Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */
++} IByteIn;
++
++typedef struct
++{
++  void (*Write)(void *p, Byte b);
++} IByteOut;
++
++typedef struct
++{
++  SRes (*Read)(void *p, void *buf, size_t *size);
++    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
++       (output(*size) < input(*size)) is allowed */
++} ISeqInStream;
++
++/* it can return SZ_ERROR_INPUT_EOF */
++SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
++SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
++SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
++
++typedef struct
++{
++  size_t (*Write)(void *p, const void *buf, size_t size);
++    /* Returns: result - the number of actually written bytes.
++       (result < size) means error */
++} ISeqOutStream;
++
++typedef enum
++{
++  SZ_SEEK_SET = 0,
++  SZ_SEEK_CUR = 1,
++  SZ_SEEK_END = 2
++} ESzSeek;
++
++typedef struct
++{
++  SRes (*Read)(void *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
++  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
++} ISeekInStream;
++
++typedef struct
++{
++  SRes (*Look)(void *p, const void **buf, size_t *size);
++    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
++       (output(*size) > input(*size)) is not allowed
++       (output(*size) < input(*size)) is allowed */
++  SRes (*Skip)(void *p, size_t offset);
++    /* offset must be <= output(*size) of Look */
++
++  SRes (*Read)(void *p, void *buf, size_t *size);
++    /* reads directly (without buffer). It's same as ISeqInStream::Read */
++  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
++} ILookInStream;
++
++SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
++SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
++
++/* reads via ILookInStream::Read */
++SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
++SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
++
++#define LookToRead_BUF_SIZE (1 << 14)
++
++typedef struct
++{
++  ILookInStream s;
++  ISeekInStream *realStream;
++  size_t pos;
++  size_t size;
++  Byte buf[LookToRead_BUF_SIZE];
++} CLookToRead;
++
++void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
++void LookToRead_Init(CLookToRead *p);
++
++typedef struct
++{
++  ISeqInStream s;
++  ILookInStream *realStream;
++} CSecToLook;
++
++void SecToLook_CreateVTable(CSecToLook *p);
++
++typedef struct
++{
++  ISeqInStream s;
++  ILookInStream *realStream;
++} CSecToRead;
++
++void SecToRead_CreateVTable(CSecToRead *p);
++
++typedef struct
++{
++  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
++    /* Returns: result. (result != SZ_OK) means break.
++       Value (UInt64)(Int64)-1 for size means unknown value. */
++} ICompressProgress;
++
++typedef struct
++{
++  void *(*Alloc)(void *p, size_t size);
++  void (*Free)(void *p, void *address); /* address can be 0 */
++} ISzAlloc;
++
++#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
++#define IAlloc_Free(p, a) (p)->Free((p), a)
++
++#ifdef _WIN32
++
++#define CHAR_PATH_SEPARATOR '\\'
++#define WCHAR_PATH_SEPARATOR L'\\'
++#define STRING_PATH_SEPARATOR "\\"
++#define WSTRING_PATH_SEPARATOR L"\\"
++
++#else
++
++#define CHAR_PATH_SEPARATOR '/'
++#define WCHAR_PATH_SEPARATOR L'/'
++#define STRING_PATH_SEPARATOR "/"
++#define WSTRING_PATH_SEPARATOR L"/"
++
++#endif
++
++EXTERN_C_END
++
++#endif
+
+Property changes on: third_party/lzma_sdk/Types.h
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzmaDec.c
+===================================================================
+--- third_party/lzma_sdk/LzmaDec.c	(revision 0)
++++ third_party/lzma_sdk/LzmaDec.c	(revision 0)
+@@ -0,0 +1,999 @@
++/* LzmaDec.c -- LZMA Decoder
++2009-09-20 : Igor Pavlov : Public domain */
++
++#include "LzmaDec.h"
++
++#include <string.h>
++
++#define kNumTopBits 24
++#define kTopValue ((UInt32)1 << kNumTopBits)
++
++#define kNumBitModelTotalBits 11
++#define kBitModelTotal (1 << kNumBitModelTotalBits)
++#define kNumMoveBits 5
++
++#define RC_INIT_SIZE 5
++
++#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
++
++#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
++#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
++#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
++#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
++  { UPDATE_0(p); i = (i + i); A0; } else \
++  { UPDATE_1(p); i = (i + i) + 1; A1; }
++#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)
++
++#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
++#define TREE_DECODE(probs, limit, i) \
++  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
++
++/* #define _LZMA_SIZE_OPT */
++
++#ifdef _LZMA_SIZE_OPT
++#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
++#else
++#define TREE_6_DECODE(probs, i) \
++  { i = 1; \
++  TREE_GET_BIT(probs, i); \
++  TREE_GET_BIT(probs, i); \
++  TREE_GET_BIT(probs, i); \
++  TREE_GET_BIT(probs, i); \
++  TREE_GET_BIT(probs, i); \
++  TREE_GET_BIT(probs, i); \
++  i -= 0x40; }
++#endif
++
++#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
++
++#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
++#define UPDATE_0_CHECK range = bound;
++#define UPDATE_1_CHECK range -= bound; code -= bound;
++#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
++  { UPDATE_0_CHECK; i = (i + i); A0; } else \
++  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
++#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
++#define TREE_DECODE_CHECK(probs, limit, i) \
++  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
++
++
++#define kNumPosBitsMax 4
++#define kNumPosStatesMax (1 << kNumPosBitsMax)
++
++#define kLenNumLowBits 3
++#define kLenNumLowSymbols (1 << kLenNumLowBits)
++#define kLenNumMidBits 3
++#define kLenNumMidSymbols (1 << kLenNumMidBits)
++#define kLenNumHighBits 8
++#define kLenNumHighSymbols (1 << kLenNumHighBits)
++
++#define LenChoice 0
++#define LenChoice2 (LenChoice + 1)
++#define LenLow (LenChoice2 + 1)
++#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
++#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
++#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
++
++
++#define kNumStates 12
++#define kNumLitStates 7
++
++#define kStartPosModelIndex 4
++#define kEndPosModelIndex 14
++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
++
++#define kNumPosSlotBits 6
++#define kNumLenToPosStates 4
++
++#define kNumAlignBits 4
++#define kAlignTableSize (1 << kNumAlignBits)
++
++#define kMatchMinLen 2
++#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
++
++#define IsMatch 0
++#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
++#define IsRepG0 (IsRep + kNumStates)
++#define IsRepG1 (IsRepG0 + kNumStates)
++#define IsRepG2 (IsRepG1 + kNumStates)
++#define IsRep0Long (IsRepG2 + kNumStates)
++#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
++#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
++#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
++#define LenCoder (Align + kAlignTableSize)
++#define RepLenCoder (LenCoder + kNumLenProbs)
++#define Literal (RepLenCoder + kNumLenProbs)
++
++#define LZMA_BASE_SIZE 1846
++#define LZMA_LIT_SIZE 768
++
++#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
++
++#if Literal != LZMA_BASE_SIZE
++StopCompilingDueBUG
++#endif
++
++#define LZMA_DIC_MIN (1 << 12)
++
++/* First LZMA-symbol is always decoded.
++And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization
++Out:
++  Result:
++    SZ_OK - OK
++    SZ_ERROR_DATA - Error
++  p->remainLen:
++    < kMatchSpecLenStart : normal remain
++    = kMatchSpecLenStart : finished
++    = kMatchSpecLenStart + 1 : Flush marker
++    = kMatchSpecLenStart + 2 : State Init Marker
++*/
++
++static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
++{
++  CLzmaProb *probs = p->probs;
++
++  unsigned state = p->state;
++  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
++  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
++  unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
++  unsigned lc = p->prop.lc;
++
++  Byte *dic = p->dic;
++  SizeT dicBufSize = p->dicBufSize;
++  SizeT dicPos = p->dicPos;
++  
++  UInt32 processedPos = p->processedPos;
++  UInt32 checkDicSize = p->checkDicSize;
++  unsigned len = 0;
++
++  const Byte *buf = p->buf;
++  UInt32 range = p->range;
++  UInt32 code = p->code;
++
++  do
++  {
++    CLzmaProb *prob;
++    UInt32 bound;
++    unsigned ttt;
++    unsigned posState = processedPos & pbMask;
++
++    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
++    IF_BIT_0(prob)
++    {
++      unsigned symbol;
++      UPDATE_0(prob);
++      prob = probs + Literal;
++      if (checkDicSize != 0 || processedPos != 0)
++        prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
++        (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
++
++      if (state < kNumLitStates)
++      {
++        state -= (state < 4) ? state : 3;
++        symbol = 1;
++        do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
++      }
++      else
++      {
++        unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
++        unsigned offs = 0x100;
++        state -= (state < 10) ? 3 : 6;
++        symbol = 1;
++        do
++        {
++          unsigned bit;
++          CLzmaProb *probLit;
++          matchByte <<= 1;
++          bit = (matchByte & offs);
++          probLit = prob + offs + bit + symbol;
++          GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
++        }
++        while (symbol < 0x100);
++      }
++      dic[dicPos++] = (Byte)symbol;
++      processedPos++;
++      continue;
++    }
++    else
++    {
++      UPDATE_1(prob);
++      prob = probs + IsRep + state;
++      IF_BIT_0(prob)
++      {
++        UPDATE_0(prob);
++        state += kNumStates;
++        prob = probs + LenCoder;
++      }
++      else
++      {
++        UPDATE_1(prob);
++        if (checkDicSize == 0 && processedPos == 0)
++          return SZ_ERROR_DATA;
++        prob = probs + IsRepG0 + state;
++        IF_BIT_0(prob)
++        {
++          UPDATE_0(prob);
++          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
++          IF_BIT_0(prob)
++          {
++            UPDATE_0(prob);
++            dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
++            dicPos++;
++            processedPos++;
++            state = state < kNumLitStates ? 9 : 11;
++            continue;
++          }
++          UPDATE_1(prob);
++        }
++        else
++        {
++          UInt32 distance;
++          UPDATE_1(prob);
++          prob = probs + IsRepG1 + state;
++          IF_BIT_0(prob)
++          {
++            UPDATE_0(prob);
++            distance = rep1;
++          }
++          else
++          {
++            UPDATE_1(prob);
++            prob = probs + IsRepG2 + state;
++            IF_BIT_0(prob)
++            {
++              UPDATE_0(prob);
++              distance = rep2;
++            }
++            else
++            {
++              UPDATE_1(prob);
++              distance = rep3;
++              rep3 = rep2;
++            }
++            rep2 = rep1;
++          }
++          rep1 = rep0;
++          rep0 = distance;
++        }
++        state = state < kNumLitStates ? 8 : 11;
++        prob = probs + RepLenCoder;
++      }
++      {
++        unsigned limit, offset;
++        CLzmaProb *probLen = prob + LenChoice;
++        IF_BIT_0(probLen)
++        {
++          UPDATE_0(probLen);
++          probLen = prob + LenLow + (posState << kLenNumLowBits);
++          offset = 0;
++          limit = (1 << kLenNumLowBits);
++        }
++        else
++        {
++          UPDATE_1(probLen);
++          probLen = prob + LenChoice2;
++          IF_BIT_0(probLen)
++          {
++            UPDATE_0(probLen);
++            probLen = prob + LenMid + (posState << kLenNumMidBits);
++            offset = kLenNumLowSymbols;
++            limit = (1 << kLenNumMidBits);
++          }
++          else
++          {
++            UPDATE_1(probLen);
++            probLen = prob + LenHigh;
++            offset = kLenNumLowSymbols + kLenNumMidSymbols;
++            limit = (1 << kLenNumHighBits);
++          }
++        }
++        TREE_DECODE(probLen, limit, len);
++        len += offset;
++      }
++
++      if (state >= kNumStates)
++      {
++        UInt32 distance;
++        prob = probs + PosSlot +
++            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
++        TREE_6_DECODE(prob, distance);
++        if (distance >= kStartPosModelIndex)
++        {
++          unsigned posSlot = (unsigned)distance;
++          int numDirectBits = (int)(((distance >> 1) - 1));
++          distance = (2 | (distance & 1));
++          if (posSlot < kEndPosModelIndex)
++          {
++            distance <<= numDirectBits;
++            prob = probs + SpecPos + distance - posSlot - 1;
++            {
++              UInt32 mask = 1;
++              unsigned i = 1;
++              do
++              {
++                GET_BIT2(prob + i, i, ; , distance |= mask);
++                mask <<= 1;
++              }
++              while (--numDirectBits != 0);
++            }
++          }
++          else
++          {
++            numDirectBits -= kNumAlignBits;
++            do
++            {
++              NORMALIZE
++              range >>= 1;
++              
++              {
++                UInt32 t;
++                code -= range;
++                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
++                distance = (distance << 1) + (t + 1);
++                code += range & t;
++              }
++              /*
++              distance <<= 1;
++              if (code >= range)
++              {
++                code -= range;
++                distance |= 1;
++              }
++              */
++            }
++            while (--numDirectBits != 0);
++            prob = probs + Align;
++            distance <<= kNumAlignBits;
++            {
++              unsigned i = 1;
++              GET_BIT2(prob + i, i, ; , distance |= 1);
++              GET_BIT2(prob + i, i, ; , distance |= 2);
++              GET_BIT2(prob + i, i, ; , distance |= 4);
++              GET_BIT2(prob + i, i, ; , distance |= 8);
++            }
++            if (distance == (UInt32)0xFFFFFFFF)
++            {
++              len += kMatchSpecLenStart;
++              state -= kNumStates;
++              break;
++            }
++          }
++        }
++        rep3 = rep2;
++        rep2 = rep1;
++        rep1 = rep0;
++        rep0 = distance + 1;
++        if (checkDicSize == 0)
++        {
++          if (distance >= processedPos)
++            return SZ_ERROR_DATA;
++        }
++        else if (distance >= checkDicSize)
++          return SZ_ERROR_DATA;
++        state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
++      }
++
++      len += kMatchMinLen;
++
++      if (limit == dicPos)
++        return SZ_ERROR_DATA;
++      {
++        SizeT rem = limit - dicPos;
++        unsigned curLen = ((rem < len) ? (unsigned)rem : len);
++        SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
++
++        processedPos += curLen;
++
++        len -= curLen;
++        if (pos + curLen <= dicBufSize)
++        {
++          Byte *dest = dic + dicPos;
++          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
++          const Byte *lim = dest + curLen;
++          dicPos += curLen;
++          do
++            *(dest) = (Byte)*(dest + src);
++          while (++dest != lim);
++        }
++        else
++        {
++          do
++          {
++            dic[dicPos++] = dic[pos];
++            if (++pos == dicBufSize)
++              pos = 0;
++          }
++          while (--curLen != 0);
++        }
++      }
++    }
++  }
++  while (dicPos < limit && buf < bufLimit);
++  NORMALIZE;
++  p->buf = buf;
++  p->range = range;
++  p->code = code;
++  p->remainLen = len;
++  p->dicPos = dicPos;
++  p->processedPos = processedPos;
++  p->reps[0] = rep0;
++  p->reps[1] = rep1;
++  p->reps[2] = rep2;
++  p->reps[3] = rep3;
++  p->state = state;
++
++  return SZ_OK;
++}
++
++static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
++{
++  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
++  {
++    Byte *dic = p->dic;
++    SizeT dicPos = p->dicPos;
++    SizeT dicBufSize = p->dicBufSize;
++    unsigned len = p->remainLen;
++    UInt32 rep0 = p->reps[0];
++    if (limit - dicPos < len)
++      len = (unsigned)(limit - dicPos);
++
++    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
++      p->checkDicSize = p->prop.dicSize;
++
++    p->processedPos += len;
++    p->remainLen -= len;
++    while (len-- != 0)
++    {
++      dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
++      dicPos++;
++    }
++    p->dicPos = dicPos;
++  }
++}
++
++static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
++{
++  do
++  {
++    SizeT limit2 = limit;
++    if (p->checkDicSize == 0)
++    {
++      UInt32 rem = p->prop.dicSize - p->processedPos;
++      if (limit - p->dicPos > rem)
++        limit2 = p->dicPos + rem;
++    }
++    RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
++    if (p->processedPos >= p->prop.dicSize)
++      p->checkDicSize = p->prop.dicSize;
++    LzmaDec_WriteRem(p, limit);
++  }
++  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
++
++  if (p->remainLen > kMatchSpecLenStart)
++  {
++    p->remainLen = kMatchSpecLenStart;
++  }
++  return 0;
++}
++
++typedef enum
++{
++  DUMMY_ERROR, /* unexpected end of input stream */
++  DUMMY_LIT,
++  DUMMY_MATCH,
++  DUMMY_REP
++} ELzmaDummy;
++
++static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
++{
++  UInt32 range = p->range;
++  UInt32 code = p->code;
++  const Byte *bufLimit = buf + inSize;
++  CLzmaProb *probs = p->probs;
++  unsigned state = p->state;
++  ELzmaDummy res;
++
++  {
++    CLzmaProb *prob;
++    UInt32 bound;
++    unsigned ttt;
++    unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
++
++    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
++    IF_BIT_0_CHECK(prob)
++    {
++      UPDATE_0_CHECK
++
++      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
++
++      prob = probs + Literal;
++      if (p->checkDicSize != 0 || p->processedPos != 0)
++        prob += (LZMA_LIT_SIZE *
++          ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
++          (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
++
++      if (state < kNumLitStates)
++      {
++        unsigned symbol = 1;
++        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
++      }
++      else
++      {
++        unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
++            ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
++        unsigned offs = 0x100;
++        unsigned symbol = 1;
++        do
++        {
++          unsigned bit;
++          CLzmaProb *probLit;
++          matchByte <<= 1;
++          bit = (matchByte & offs);
++          probLit = prob + offs + bit + symbol;
++          GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
++        }
++        while (symbol < 0x100);
++      }
++      res = DUMMY_LIT;
++    }
++    else
++    {
++      unsigned len;
++      UPDATE_1_CHECK;
++
++      prob = probs + IsRep + state;
++      IF_BIT_0_CHECK(prob)
++      {
++        UPDATE_0_CHECK;
++        state = 0;
++        prob = probs + LenCoder;
++        res = DUMMY_MATCH;
++      }
++      else
++      {
++        UPDATE_1_CHECK;
++        res = DUMMY_REP;
++        prob = probs + IsRepG0 + state;
++        IF_BIT_0_CHECK(prob)
++        {
++          UPDATE_0_CHECK;
++          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
++          IF_BIT_0_CHECK(prob)
++          {
++            UPDATE_0_CHECK;
++            NORMALIZE_CHECK;
++            return DUMMY_REP;
++          }
++          else
++          {
++            UPDATE_1_CHECK;
++          }
++        }
++        else
++        {
++          UPDATE_1_CHECK;
++          prob = probs + IsRepG1 + state;
++          IF_BIT_0_CHECK(prob)
++          {
++            UPDATE_0_CHECK;
++          }
++          else
++          {
++            UPDATE_1_CHECK;
++            prob = probs + IsRepG2 + state;
++            IF_BIT_0_CHECK(prob)
++            {
++              UPDATE_0_CHECK;
++            }
++            else
++            {
++              UPDATE_1_CHECK;
++            }
++          }
++        }
++        state = kNumStates;
++        prob = probs + RepLenCoder;
++      }
++      {
++        unsigned limit, offset;
++        CLzmaProb *probLen = prob + LenChoice;
++        IF_BIT_0_CHECK(probLen)
++        {
++          UPDATE_0_CHECK;
++          probLen = prob + LenLow + (posState << kLenNumLowBits);
++          offset = 0;
++          limit = 1 << kLenNumLowBits;
++        }
++        else
++        {
++          UPDATE_1_CHECK;
++          probLen = prob + LenChoice2;
++          IF_BIT_0_CHECK(probLen)
++          {
++            UPDATE_0_CHECK;
++            probLen = prob + LenMid + (posState << kLenNumMidBits);
++            offset = kLenNumLowSymbols;
++            limit = 1 << kLenNumMidBits;
++          }
++          else
++          {
++            UPDATE_1_CHECK;
++            probLen = prob + LenHigh;
++            offset = kLenNumLowSymbols + kLenNumMidSymbols;
++            limit = 1 << kLenNumHighBits;
++          }
++        }
++        TREE_DECODE_CHECK(probLen, limit, len);
++        len += offset;
++      }
++
++      if (state < 4)
++      {
++        unsigned posSlot;
++        prob = probs + PosSlot +
++            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) <<
++            kNumPosSlotBits);
++        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
++        if (posSlot >= kStartPosModelIndex)
++        {
++          int numDirectBits = ((posSlot >> 1) - 1);
++
++          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
++
++          if (posSlot < kEndPosModelIndex)
++          {
++            prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
++          }
++          else
++          {
++            numDirectBits -= kNumAlignBits;
++            do
++            {
++              NORMALIZE_CHECK
++              range >>= 1;
++              code -= range & (((code - range) >> 31) - 1);
++              /* if (code >= range) code -= range; */
++            }
++            while (--numDirectBits != 0);
++            prob = probs + Align;
++            numDirectBits = kNumAlignBits;
++          }
++          {
++            unsigned i = 1;
++            do
++            {
++              GET_BIT_CHECK(prob + i, i);
++            }
++            while (--numDirectBits != 0);
++          }
++        }
++      }
++    }
++  }
++  NORMALIZE_CHECK;
++  return res;
++}
++
++
++static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
++{
++  p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
++  p->range = 0xFFFFFFFF;
++  p->needFlush = 0;
++}
++
++void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
++{
++  p->needFlush = 1;
++  p->remainLen = 0;
++  p->tempBufSize = 0;
++
++  if (initDic)
++  {
++    p->processedPos = 0;
++    p->checkDicSize = 0;
++    p->needInitState = 1;
++  }
++  if (initState)
++    p->needInitState = 1;
++}
++
++void LzmaDec_Init(CLzmaDec *p)
++{
++  p->dicPos = 0;
++  LzmaDec_InitDicAndState(p, True, True);
++}
++
++static void LzmaDec_InitStateReal(CLzmaDec *p)
++{
++  UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
++  UInt32 i;
++  CLzmaProb *probs = p->probs;
++  for (i = 0; i < numProbs; i++)
++    probs[i] = kBitModelTotal >> 1;
++  p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
++  p->state = 0;
++  p->needInitState = 0;
++}
++
++SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
++    ELzmaFinishMode finishMode, ELzmaStatus *status)
++{
++  SizeT inSize = *srcLen;
++  (*srcLen) = 0;
++  LzmaDec_WriteRem(p, dicLimit);
++  
++  *status = LZMA_STATUS_NOT_SPECIFIED;
++
++  while (p->remainLen != kMatchSpecLenStart)
++  {
++      int checkEndMarkNow;
++
++      if (p->needFlush != 0)
++      {
++        for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
++          p->tempBuf[p->tempBufSize++] = *src++;
++        if (p->tempBufSize < RC_INIT_SIZE)
++        {
++          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
++          return SZ_OK;
++        }
++        if (p->tempBuf[0] != 0)
++          return SZ_ERROR_DATA;
++
++        LzmaDec_InitRc(p, p->tempBuf);
++        p->tempBufSize = 0;
++      }
++
++      checkEndMarkNow = 0;
++      if (p->dicPos >= dicLimit)
++      {
++        if (p->remainLen == 0 && p->code == 0)
++        {
++          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
++          return SZ_OK;
++        }
++        if (finishMode == LZMA_FINISH_ANY)
++        {
++          *status = LZMA_STATUS_NOT_FINISHED;
++          return SZ_OK;
++        }
++        if (p->remainLen != 0)
++        {
++          *status = LZMA_STATUS_NOT_FINISHED;
++          return SZ_ERROR_DATA;
++        }
++        checkEndMarkNow = 1;
++      }
++
++      if (p->needInitState)
++        LzmaDec_InitStateReal(p);
++  
++      if (p->tempBufSize == 0)
++      {
++        SizeT processed;
++        const Byte *bufLimit;
++        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
++        {
++          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
++          if (dummyRes == DUMMY_ERROR)
++          {
++            memcpy(p->tempBuf, src, inSize);
++            p->tempBufSize = (unsigned)inSize;
++            (*srcLen) += inSize;
++            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
++            return SZ_OK;
++          }
++          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
++          {
++            *status = LZMA_STATUS_NOT_FINISHED;
++            return SZ_ERROR_DATA;
++          }
++          bufLimit = src;
++        }
++        else
++          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
++        p->buf = src;
++        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
++          return SZ_ERROR_DATA;
++        processed = (SizeT)(p->buf - src);
++        (*srcLen) += processed;
++        src += processed;
++        inSize -= processed;
++      }
++      else
++      {
++        unsigned rem = p->tempBufSize, lookAhead = 0;
++        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
++          p->tempBuf[rem++] = src[lookAhead++];
++        p->tempBufSize = rem;
++        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
++        {
++          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
++          if (dummyRes == DUMMY_ERROR)
++          {
++            (*srcLen) += lookAhead;
++            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
++            return SZ_OK;
++          }
++          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
++          {
++            *status = LZMA_STATUS_NOT_FINISHED;
++            return SZ_ERROR_DATA;
++          }
++        }
++        p->buf = p->tempBuf;
++        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
++          return SZ_ERROR_DATA;
++        lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
++        (*srcLen) += lookAhead;
++        src += lookAhead;
++        inSize -= lookAhead;
++        p->tempBufSize = 0;
++      }
++  }
++  if (p->code == 0)
++    *status = LZMA_STATUS_FINISHED_WITH_MARK;
++  return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
++}
++
++SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
++{
++  SizeT outSize = *destLen;
++  SizeT inSize = *srcLen;
++  *srcLen = *destLen = 0;
++  for (;;)
++  {
++    SizeT inSizeCur = inSize, outSizeCur, dicPos;
++    ELzmaFinishMode curFinishMode;
++    SRes res;
++    if (p->dicPos == p->dicBufSize)
++      p->dicPos = 0;
++    dicPos = p->dicPos;
++    if (outSize > p->dicBufSize - dicPos)
++    {
++      outSizeCur = p->dicBufSize;
++      curFinishMode = LZMA_FINISH_ANY;
++    }
++    else
++    {
++      outSizeCur = dicPos + outSize;
++      curFinishMode = finishMode;
++    }
++
++    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
++    src += inSizeCur;
++    inSize -= inSizeCur;
++    *srcLen += inSizeCur;
++    outSizeCur = p->dicPos - dicPos;
++    memcpy(dest, p->dic + dicPos, outSizeCur);
++    dest += outSizeCur;
++    outSize -= outSizeCur;
++    *destLen += outSizeCur;
++    if (res != 0)
++      return res;
++    if (outSizeCur == 0 || outSize == 0)
++      return SZ_OK;
++  }
++}
++
++void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
++{
++  alloc->Free(alloc, p->probs);
++  p->probs = 0;
++}
++
++static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
++{
++  alloc->Free(alloc, p->dic);
++  p->dic = 0;
++}
++
++void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
++{
++  LzmaDec_FreeProbs(p, alloc);
++  LzmaDec_FreeDict(p, alloc);
++}
++
++SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
++{
++  UInt32 dicSize;
++  Byte d;
++  
++  if (size < LZMA_PROPS_SIZE)
++    return SZ_ERROR_UNSUPPORTED;
++  else
++    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
++ 
++  if (dicSize < LZMA_DIC_MIN)
++    dicSize = LZMA_DIC_MIN;
++  p->dicSize = dicSize;
++
++  d = data[0];
++  if (d >= (9 * 5 * 5))
++    return SZ_ERROR_UNSUPPORTED;
++
++  p->lc = d % 9;
++  d /= 9;
++  p->pb = d / 5;
++  p->lp = d % 5;
++
++  return SZ_OK;
++}
++
++static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
++{
++  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
++  if (p->probs == 0 || numProbs != p->numProbs)
++  {
++    LzmaDec_FreeProbs(p, alloc);
++    p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
++    p->numProbs = numProbs;
++    if (p->probs == 0)
++      return SZ_ERROR_MEM;
++  }
++  return SZ_OK;
++}
++
++SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
++{
++  CLzmaProps propNew;
++  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
++  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
++  p->prop = propNew;
++  return SZ_OK;
++}
++
++SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
++{
++  CLzmaProps propNew;
++  SizeT dicBufSize;
++  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
++  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
++  dicBufSize = propNew.dicSize;
++  if (p->dic == 0 || dicBufSize != p->dicBufSize)
++  {
++    LzmaDec_FreeDict(p, alloc);
++    p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
++    if (p->dic == 0)
++    {
++      LzmaDec_FreeProbs(p, alloc);
++      return SZ_ERROR_MEM;
++    }
++  }
++  p->dicBufSize = dicBufSize;
++  p->prop = propNew;
++  return SZ_OK;
++}
++
++SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
++    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
++    ELzmaStatus *status, ISzAlloc *alloc)
++{
++  CLzmaDec p;
++  SRes res;
++  SizeT inSize = *srcLen;
++  SizeT outSize = *destLen;
++  *srcLen = *destLen = 0;
++  if (inSize < RC_INIT_SIZE)
++    return SZ_ERROR_INPUT_EOF;
++
++  LzmaDec_Construct(&p);
++  res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
++  if (res != 0)
++    return res;
++  p.dic = dest;
++  p.dicBufSize = outSize;
++
++  LzmaDec_Init(&p);
++  
++  *srcLen = inSize;
++  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
++
++  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
++    res = SZ_ERROR_INPUT_EOF;
++
++  (*destLen) = p.dicPos;
++  LzmaDec_FreeProbs(&p, alloc);
++  return res;
++}
+
+Property changes on: third_party/lzma_sdk/LzmaDec.c
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/README.ots
+===================================================================
+--- third_party/lzma_sdk/README.ots	(revision 0)
++++ third_party/lzma_sdk/README.ots	(revision 0)
+@@ -0,0 +1,8 @@
++Name: LZMA SDK
++URL: http://www.7-zip.org/sdk.html
++Version: 9.20
++
++Description:
++The LZMA SDK provides the documentation, samples, header files, libraries, and tools you need to develop applications that use LZMA compression.
++
++This contains only the C code required to decompress LZMA.
+Index: third_party/lzma_sdk/lzma_sdk.gyp
+===================================================================
+--- third_party/lzma_sdk/lzma_sdk.gyp	(revision 0)
++++ third_party/lzma_sdk/lzma_sdk.gyp	(revision 0)
+@@ -0,0 +1,33 @@
++# Copyright (c) 2012 The Chromium Authors. All rights reserved.
++# Use of this source code is governed by a BSD-style license that can be
++# found in the LICENSE file.
++
++{
++  'targets': [
++    {
++      'target_name': 'lzma_sdk',
++      'type': 'static_library',
++      'defines': [
++        '_7ZIP_ST',
++        '_LZMA_PROB32',
++      ],
++      'sources': [
++        'Alloc.c',
++        'Alloc.h',
++        'LzFind.c',
++        'LzFind.h',
++        'LzHash.h',
++        'LzmaEnc.c',
++        'LzmaEnc.h',
++        'LzmaDec.c',
++        'LzmaDec.h',
++        'LzmaLib.c',
++        'LzmaLib.h',
++        'Types.h',
++      ],
++      'include_dirs': [
++        '.',
++      ],
++    },
++  ],
++}
+
+Property changes on: third_party/lzma_sdk/lzma_sdk.gyp
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzmaDec.h
+===================================================================
+--- third_party/lzma_sdk/LzmaDec.h	(revision 0)
++++ third_party/lzma_sdk/LzmaDec.h	(revision 0)
+@@ -0,0 +1,231 @@
++/* LzmaDec.h -- LZMA Decoder
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZMA_DEC_H
++#define __LZMA_DEC_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/* #define _LZMA_PROB32 */
++/* _LZMA_PROB32 can increase the speed on some CPUs,
++   but memory usage for CLzmaDec::probs will be doubled in that case */
++
++#ifdef _LZMA_PROB32
++#define CLzmaProb UInt32
++#else
++#define CLzmaProb UInt16
++#endif
++
++
++/* ---------- LZMA Properties ---------- */
++
++#define LZMA_PROPS_SIZE 5
++
++typedef struct _CLzmaProps
++{
++  unsigned lc, lp, pb;
++  UInt32 dicSize;
++} CLzmaProps;
++
++/* LzmaProps_Decode - decodes properties
++Returns:
++  SZ_OK
++  SZ_ERROR_UNSUPPORTED - Unsupported properties
++*/
++
++SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
++
++
++/* ---------- LZMA Decoder state ---------- */
++
++/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
++   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
++
++#define LZMA_REQUIRED_INPUT_MAX 20
++
++typedef struct
++{
++  CLzmaProps prop;
++  CLzmaProb *probs;
++  Byte *dic;
++  const Byte *buf;
++  UInt32 range, code;
++  SizeT dicPos;
++  SizeT dicBufSize;
++  UInt32 processedPos;
++  UInt32 checkDicSize;
++  unsigned state;
++  UInt32 reps[4];
++  unsigned remainLen;
++  int needFlush;
++  int needInitState;
++  UInt32 numProbs;
++  unsigned tempBufSize;
++  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
++} CLzmaDec;
++
++#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
++
++void LzmaDec_Init(CLzmaDec *p);
++
++/* There are two types of LZMA streams:
++     0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
++     1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
++
++typedef enum
++{
++  LZMA_FINISH_ANY,   /* finish at any point */
++  LZMA_FINISH_END    /* block must be finished at the end */
++} ELzmaFinishMode;
++
++/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
++
++   You must use LZMA_FINISH_END, when you know that current output buffer
++   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
++
++   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
++   and output value of destLen will be less than output buffer size limit.
++   You can check status result also.
++
++   You can use multiple checks to test data integrity after full decompression:
++     1) Check Result and "status" variable.
++     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
++     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
++        You must use correct finish mode in that case. */
++
++typedef enum
++{
++  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
++  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
++  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
++  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
++  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
++} ELzmaStatus;
++
++/* ELzmaStatus is used only as output value for function call */
++
++
++/* ---------- Interfaces ---------- */
++
++/* There are 3 levels of interfaces:
++     1) Dictionary Interface
++     2) Buffer Interface
++     3) One Call Interface
++   You can select any of these interfaces, but don't mix functions from different
++   groups for same object. */
++
++
++/* There are two variants to allocate state for Dictionary Interface:
++     1) LzmaDec_Allocate / LzmaDec_Free
++     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
++   You can use variant 2, if you set dictionary buffer manually.
++   For Buffer Interface you must always use variant 1.
++
++LzmaDec_Allocate* can return:
++  SZ_OK
++  SZ_ERROR_MEM         - Memory allocation error
++  SZ_ERROR_UNSUPPORTED - Unsupported properties
++*/
++   
++SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
++void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
++
++SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
++void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
++
++/* ---------- Dictionary Interface ---------- */
++
++/* You can use it, if you want to eliminate the overhead for data copying from
++   dictionary to some other external buffer.
++   You must work with CLzmaDec variables directly in this interface.
++
++   STEPS:
++     LzmaDec_Constr()
++     LzmaDec_Allocate()
++     for (each new stream)
++     {
++       LzmaDec_Init()
++       while (it needs more decompression)
++       {
++         LzmaDec_DecodeToDic()
++         use data from CLzmaDec::dic and update CLzmaDec::dicPos
++       }
++     }
++     LzmaDec_Free()
++*/
++
++/* LzmaDec_DecodeToDic
++   
++   The decoding to internal dictionary buffer (CLzmaDec::dic).
++   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
++
++finishMode:
++  It has meaning only if the decoding reaches output limit (dicLimit).
++  LZMA_FINISH_ANY - Decode just dicLimit bytes.
++  LZMA_FINISH_END - Stream must be finished after dicLimit.
++
++Returns:
++  SZ_OK
++    status:
++      LZMA_STATUS_FINISHED_WITH_MARK
++      LZMA_STATUS_NOT_FINISHED
++      LZMA_STATUS_NEEDS_MORE_INPUT
++      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
++  SZ_ERROR_DATA - Data error
++*/
++
++SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
++    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
++
++
++/* ---------- Buffer Interface ---------- */
++
++/* It's zlib-like interface.
++   See LzmaDec_DecodeToDic description for information about STEPS and return results,
++   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
++   to work with CLzmaDec variables manually.
++
++finishMode:
++  It has meaning only if the decoding reaches output limit (*destLen).
++  LZMA_FINISH_ANY - Decode just destLen bytes.
++  LZMA_FINISH_END - Stream must be finished after (*destLen).
++*/
++
++SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
++    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
++
++
++/* ---------- One Call Interface ---------- */
++
++/* LzmaDecode
++
++finishMode:
++  It has meaning only if the decoding reaches output limit (*destLen).
++  LZMA_FINISH_ANY - Decode just destLen bytes.
++  LZMA_FINISH_END - Stream must be finished after (*destLen).
++
++Returns:
++  SZ_OK
++    status:
++      LZMA_STATUS_FINISHED_WITH_MARK
++      LZMA_STATUS_NOT_FINISHED
++      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
++  SZ_ERROR_DATA - Data error
++  SZ_ERROR_MEM  - Memory allocation error
++  SZ_ERROR_UNSUPPORTED - Unsupported properties
++  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
++*/
++
++SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
++    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
++    ELzmaStatus *status, ISzAlloc *alloc);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzmaDec.h
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzFind.c
+===================================================================
+--- third_party/lzma_sdk/LzFind.c	(revision 0)
++++ third_party/lzma_sdk/LzFind.c	(revision 0)
+@@ -0,0 +1,761 @@
++/* LzFind.c -- Match finder for LZ algorithms
++2009-04-22 : Igor Pavlov : Public domain */
++
++#include <string.h>
++
++#include "LzFind.h"
++#include "LzHash.h"
++
++#define kEmptyHashValue 0
++#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
++#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
++#define kNormalizeMask (~(kNormalizeStepMin - 1))
++#define kMaxHistorySize ((UInt32)3 << 30)
++
++#define kStartMaxLen 3
++
++static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
++{
++  if (!p->directInput)
++  {
++    alloc->Free(alloc, p->bufferBase);
++    p->bufferBase = 0;
++  }
++}
++
++/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
++
++static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
++{
++  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
++  if (p->directInput)
++  {
++    p->blockSize = blockSize;
++    return 1;
++  }
++  if (p->bufferBase == 0 || p->blockSize != blockSize)
++  {
++    LzInWindow_Free(p, alloc);
++    p->blockSize = blockSize;
++    p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
++  }
++  return (p->bufferBase != 0);
++}
++
++Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
++Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
++
++UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
++
++void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
++{
++  p->posLimit -= subValue;
++  p->pos -= subValue;
++  p->streamPos -= subValue;
++}
++
++static void MatchFinder_ReadBlock(CMatchFinder *p)
++{
++  if (p->streamEndWasReached || p->result != SZ_OK)
++    return;
++  if (p->directInput)
++  {
++    UInt32 curSize = 0xFFFFFFFF - p->streamPos;
++    if (curSize > p->directInputRem)
++      curSize = (UInt32)p->directInputRem;
++    p->directInputRem -= curSize;
++    p->streamPos += curSize;
++    if (p->directInputRem == 0)
++      p->streamEndWasReached = 1;
++    return;
++  }
++  for (;;)
++  {
++    Byte *dest = p->buffer + (p->streamPos - p->pos);
++    size_t size = (p->bufferBase + p->blockSize - dest);
++    if (size == 0)
++      return;
++    p->result = p->stream->Read(p->stream, dest, &size);
++    if (p->result != SZ_OK)
++      return;
++    if (size == 0)
++    {
++      p->streamEndWasReached = 1;
++      return;
++    }
++    p->streamPos += (UInt32)size;
++    if (p->streamPos - p->pos > p->keepSizeAfter)
++      return;
++  }
++}
++
++void MatchFinder_MoveBlock(CMatchFinder *p)
++{
++  memmove(p->bufferBase,
++    p->buffer - p->keepSizeBefore,
++    (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
++  p->buffer = p->bufferBase + p->keepSizeBefore;
++}
++
++int MatchFinder_NeedMove(CMatchFinder *p)
++{
++  if (p->directInput)
++    return 0;
++  /* if (p->streamEndWasReached) return 0; */
++  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
++}
++
++void MatchFinder_ReadIfRequired(CMatchFinder *p)
++{
++  if (p->streamEndWasReached)
++    return;
++  if (p->keepSizeAfter >= p->streamPos - p->pos)
++    MatchFinder_ReadBlock(p);
++}
++
++static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
++{
++  if (MatchFinder_NeedMove(p))
++    MatchFinder_MoveBlock(p);
++  MatchFinder_ReadBlock(p);
++}
++
++static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
++{
++  p->cutValue = 32;
++  p->btMode = 1;
++  p->numHashBytes = 4;
++  p->bigHash = 0;
++}
++
++#define kCrcPoly 0xEDB88320
++
++void MatchFinder_Construct(CMatchFinder *p)
++{
++  UInt32 i;
++  p->bufferBase = 0;
++  p->directInput = 0;
++  p->hash = 0;
++  MatchFinder_SetDefaultSettings(p);
++
++  for (i = 0; i < 256; i++)
++  {
++    UInt32 r = i;
++    int j;
++    for (j = 0; j < 8; j++)
++      r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
++    p->crc[i] = r;
++  }
++}
++
++static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
++{
++  alloc->Free(alloc, p->hash);
++  p->hash = 0;
++}
++
++void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
++{
++  MatchFinder_FreeThisClassMemory(p, alloc);
++  LzInWindow_Free(p, alloc);
++}
++
++static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
++{
++  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
++  if (sizeInBytes / sizeof(CLzRef) != num)
++    return 0;
++  return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
++}
++
++int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
++    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
++    ISzAlloc *alloc)
++{
++  UInt32 sizeReserv;
++  if (historySize > kMaxHistorySize)
++  {
++    MatchFinder_Free(p, alloc);
++    return 0;
++  }
++  sizeReserv = historySize >> 1;
++  if (historySize > ((UInt32)2 << 30))
++    sizeReserv = historySize >> 2;
++  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
++
++  p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
++  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
++  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
++  if (LzInWindow_Create(p, sizeReserv, alloc))
++  {
++    UInt32 newCyclicBufferSize = historySize + 1;
++    UInt32 hs;
++    p->matchMaxLen = matchMaxLen;
++    {
++      p->fixedHashSize = 0;
++      if (p->numHashBytes == 2)
++        hs = (1 << 16) - 1;
++      else
++      {
++        hs = historySize - 1;
++        hs |= (hs >> 1);
++        hs |= (hs >> 2);
++        hs |= (hs >> 4);
++        hs |= (hs >> 8);
++        hs >>= 1;
++        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
++        if (hs > (1 << 24))
++        {
++          if (p->numHashBytes == 3)
++            hs = (1 << 24) - 1;
++          else
++            hs >>= 1;
++        }
++      }
++      p->hashMask = hs;
++      hs++;
++      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
++      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
++      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
++      hs += p->fixedHashSize;
++    }
++
++    {
++      UInt32 prevSize = p->hashSizeSum + p->numSons;
++      UInt32 newSize;
++      p->historySize = historySize;
++      p->hashSizeSum = hs;
++      p->cyclicBufferSize = newCyclicBufferSize;
++      p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
++      newSize = p->hashSizeSum + p->numSons;
++      if (p->hash != 0 && prevSize == newSize)
++        return 1;
++      MatchFinder_FreeThisClassMemory(p, alloc);
++      p->hash = AllocRefs(newSize, alloc);
++      if (p->hash != 0)
++      {
++        p->son = p->hash + p->hashSizeSum;
++        return 1;
++      }
++    }
++  }
++  MatchFinder_Free(p, alloc);
++  return 0;
++}
++
++static void MatchFinder_SetLimits(CMatchFinder *p)
++{
++  UInt32 limit = kMaxValForNormalize - p->pos;
++  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
++  if (limit2 < limit)
++    limit = limit2;
++  limit2 = p->streamPos - p->pos;
++  if (limit2 <= p->keepSizeAfter)
++  {
++    if (limit2 > 0)
++      limit2 = 1;
++  }
++  else
++    limit2 -= p->keepSizeAfter;
++  if (limit2 < limit)
++    limit = limit2;
++  {
++    UInt32 lenLimit = p->streamPos - p->pos;
++    if (lenLimit > p->matchMaxLen)
++      lenLimit = p->matchMaxLen;
++    p->lenLimit = lenLimit;
++  }
++  p->posLimit = p->pos + limit;
++}
++
++void MatchFinder_Init(CMatchFinder *p)
++{
++  UInt32 i;
++  for (i = 0; i < p->hashSizeSum; i++)
++    p->hash[i] = kEmptyHashValue;
++  p->cyclicBufferPos = 0;
++  p->buffer = p->bufferBase;
++  p->pos = p->streamPos = p->cyclicBufferSize;
++  p->result = SZ_OK;
++  p->streamEndWasReached = 0;
++  MatchFinder_ReadBlock(p);
++  MatchFinder_SetLimits(p);
++}
++
++static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
++{
++  return (p->pos - p->historySize - 1) & kNormalizeMask;
++}
++
++void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
++{
++  UInt32 i;
++  for (i = 0; i < numItems; i++)
++  {
++    UInt32 value = items[i];
++    if (value <= subValue)
++      value = kEmptyHashValue;
++    else
++      value -= subValue;
++    items[i] = value;
++  }
++}
++
++static void MatchFinder_Normalize(CMatchFinder *p)
++{
++  UInt32 subValue = MatchFinder_GetSubValue(p);
++  MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
++  MatchFinder_ReduceOffsets(p, subValue);
++}
++
++static void MatchFinder_CheckLimits(CMatchFinder *p)
++{
++  if (p->pos == kMaxValForNormalize)
++    MatchFinder_Normalize(p);
++  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
++    MatchFinder_CheckAndMoveAndRead(p);
++  if (p->cyclicBufferPos == p->cyclicBufferSize)
++    p->cyclicBufferPos = 0;
++  MatchFinder_SetLimits(p);
++}
++
++static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
++    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
++    UInt32 *distances, UInt32 maxLen)
++{
++  son[_cyclicBufferPos] = curMatch;
++  for (;;)
++  {
++    UInt32 delta = pos - curMatch;
++    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
++      return distances;
++    {
++      const Byte *pb = cur - delta;
++      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
++      if (pb[maxLen] == cur[maxLen] && *pb == *cur)
++      {
++        UInt32 len = 0;
++        while (++len != lenLimit)
++          if (pb[len] != cur[len])
++            break;
++        if (maxLen < len)
++        {
++          *distances++ = maxLen = len;
++          *distances++ = delta - 1;
++          if (len == lenLimit)
++            return distances;
++        }
++      }
++    }
++  }
++}
++
++UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
++    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
++    UInt32 *distances, UInt32 maxLen)
++{
++  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
++  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
++  UInt32 len0 = 0, len1 = 0;
++  for (;;)
++  {
++    UInt32 delta = pos - curMatch;
++    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
++    {
++      *ptr0 = *ptr1 = kEmptyHashValue;
++      return distances;
++    }
++    {
++      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
++      const Byte *pb = cur - delta;
++      UInt32 len = (len0 < len1 ? len0 : len1);
++      if (pb[len] == cur[len])
++      {
++        if (++len != lenLimit && pb[len] == cur[len])
++          while (++len != lenLimit)
++            if (pb[len] != cur[len])
++              break;
++        if (maxLen < len)
++        {
++          *distances++ = maxLen = len;
++          *distances++ = delta - 1;
++          if (len == lenLimit)
++          {
++            *ptr1 = pair[0];
++            *ptr0 = pair[1];
++            return distances;
++          }
++        }
++      }
++      if (pb[len] < cur[len])
++      {
++        *ptr1 = curMatch;
++        ptr1 = pair + 1;
++        curMatch = *ptr1;
++        len1 = len;
++      }
++      else
++      {
++        *ptr0 = curMatch;
++        ptr0 = pair;
++        curMatch = *ptr0;
++        len0 = len;
++      }
++    }
++  }
++}
++
++static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
++    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
++{
++  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
++  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
++  UInt32 len0 = 0, len1 = 0;
++  for (;;)
++  {
++    UInt32 delta = pos - curMatch;
++    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
++    {
++      *ptr0 = *ptr1 = kEmptyHashValue;
++      return;
++    }
++    {
++      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
++      const Byte *pb = cur - delta;
++      UInt32 len = (len0 < len1 ? len0 : len1);
++      if (pb[len] == cur[len])
++      {
++        while (++len != lenLimit)
++          if (pb[len] != cur[len])
++            break;
++        {
++          if (len == lenLimit)
++          {
++            *ptr1 = pair[0];
++            *ptr0 = pair[1];
++            return;
++          }
++        }
++      }
++      if (pb[len] < cur[len])
++      {
++        *ptr1 = curMatch;
++        ptr1 = pair + 1;
++        curMatch = *ptr1;
++        len1 = len;
++      }
++      else
++      {
++        *ptr0 = curMatch;
++        ptr0 = pair;
++        curMatch = *ptr0;
++        len0 = len;
++      }
++    }
++  }
++}
++
++#define MOVE_POS \
++  ++p->cyclicBufferPos; \
++  p->buffer++; \
++  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
++
++#define MOVE_POS_RET MOVE_POS return offset;
++
++static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
++
++#define GET_MATCHES_HEADER2(minLen, ret_op) \
++  UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
++  lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
++  cur = p->buffer;
++
++#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
++#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue)
++
++#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
++
++#define GET_MATCHES_FOOTER(offset, maxLen) \
++  offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
++  distances + offset, maxLen) - distances); MOVE_POS_RET;
++
++#define SKIP_FOOTER \
++  SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
++
++static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++  UInt32 offset;
++  GET_MATCHES_HEADER(2)
++  HASH2_CALC;
++  curMatch = p->hash[hashValue];
++  p->hash[hashValue] = p->pos;
++  offset = 0;
++  GET_MATCHES_FOOTER(offset, 1)
++}
++
++UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++  UInt32 offset;
++  GET_MATCHES_HEADER(3)
++  HASH_ZIP_CALC;
++  curMatch = p->hash[hashValue];
++  p->hash[hashValue] = p->pos;
++  offset = 0;
++  GET_MATCHES_FOOTER(offset, 2)
++}
++
++static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++  UInt32 hash2Value, delta2, maxLen, offset;
++  GET_MATCHES_HEADER(3)
++
++  HASH3_CALC;
++
++  delta2 = p->pos - p->hash[hash2Value];
++  curMatch = p->hash[kFix3HashSize + hashValue];
++  
++  p->hash[hash2Value] =
++  p->hash[kFix3HashSize + hashValue] = p->pos;
++
++
++  maxLen = 2;
++  offset = 0;
++  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
++  {
++    for (; maxLen != lenLimit; maxLen++)
++      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
++        break;
++    distances[0] = maxLen;
++    distances[1] = delta2 - 1;
++    offset = 2;
++    if (maxLen == lenLimit)
++    {
++      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
++      MOVE_POS_RET;
++    }
++  }
++  GET_MATCHES_FOOTER(offset, maxLen)
++}
++
++static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++  UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
++  GET_MATCHES_HEADER(4)
++
++  HASH4_CALC;
++
++  delta2 = p->pos - p->hash[                hash2Value];
++  delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
++  curMatch = p->hash[kFix4HashSize + hashValue];
++  
++  p->hash[                hash2Value] =
++  p->hash[kFix3HashSize + hash3Value] =
++  p->hash[kFix4HashSize + hashValue] = p->pos;
++
++  maxLen = 1;
++  offset = 0;
++  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
++  {
++    distances[0] = maxLen = 2;
++    distances[1] = delta2 - 1;
++    offset = 2;
++  }
++  if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
++  {
++    maxLen = 3;
++    distances[offset + 1] = delta3 - 1;
++    offset += 2;
++    delta2 = delta3;
++  }
++  if (offset != 0)
++  {
++    for (; maxLen != lenLimit; maxLen++)
++      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
++        break;
++    distances[offset - 2] = maxLen;
++    if (maxLen == lenLimit)
++    {
++      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
++      MOVE_POS_RET;
++    }
++  }
++  if (maxLen < 3)
++    maxLen = 3;
++  GET_MATCHES_FOOTER(offset, maxLen)
++}
++
++static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++  UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
++  GET_MATCHES_HEADER(4)
++
++  HASH4_CALC;
++
++  delta2 = p->pos - p->hash[                hash2Value];
++  delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
++  curMatch = p->hash[kFix4HashSize + hashValue];
++
++  p->hash[                hash2Value] =
++  p->hash[kFix3HashSize + hash3Value] =
++  p->hash[kFix4HashSize + hashValue] = p->pos;
++
++  maxLen = 1;
++  offset = 0;
++  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
++  {
++    distances[0] = maxLen = 2;
++    distances[1] = delta2 - 1;
++    offset = 2;
++  }
++  if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
++  {
++    maxLen = 3;
++    distances[offset + 1] = delta3 - 1;
++    offset += 2;
++    delta2 = delta3;
++  }
++  if (offset != 0)
++  {
++    for (; maxLen != lenLimit; maxLen++)
++      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
++        break;
++    distances[offset - 2] = maxLen;
++    if (maxLen == lenLimit)
++    {
++      p->son[p->cyclicBufferPos] = curMatch;
++      MOVE_POS_RET;
++    }
++  }
++  if (maxLen < 3)
++    maxLen = 3;
++  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
++    distances + offset, maxLen) - (distances));
++  MOVE_POS_RET
++}
++
++UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++  UInt32 offset;
++  GET_MATCHES_HEADER(3)
++  HASH_ZIP_CALC;
++  curMatch = p->hash[hashValue];
++  p->hash[hashValue] = p->pos;
++  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
++    distances, 2) - (distances));
++  MOVE_POS_RET
++}
++
++static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++  do
++  {
++    SKIP_HEADER(2)
++    HASH2_CALC;
++    curMatch = p->hash[hashValue];
++    p->hash[hashValue] = p->pos;
++    SKIP_FOOTER
++  }
++  while (--num != 0);
++}
++
++void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++  do
++  {
++    SKIP_HEADER(3)
++    HASH_ZIP_CALC;
++    curMatch = p->hash[hashValue];
++    p->hash[hashValue] = p->pos;
++    SKIP_FOOTER
++  }
++  while (--num != 0);
++}
++
++static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++  do
++  {
++    UInt32 hash2Value;
++    SKIP_HEADER(3)
++    HASH3_CALC;
++    curMatch = p->hash[kFix3HashSize + hashValue];
++    p->hash[hash2Value] =
++    p->hash[kFix3HashSize + hashValue] = p->pos;
++    SKIP_FOOTER
++  }
++  while (--num != 0);
++}
++
++static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++  do
++  {
++    UInt32 hash2Value, hash3Value;
++    SKIP_HEADER(4)
++    HASH4_CALC;
++    curMatch = p->hash[kFix4HashSize + hashValue];
++    p->hash[                hash2Value] =
++    p->hash[kFix3HashSize + hash3Value] = p->pos;
++    p->hash[kFix4HashSize + hashValue] = p->pos;
++    SKIP_FOOTER
++  }
++  while (--num != 0);
++}
++
++static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++  do
++  {
++    UInt32 hash2Value, hash3Value;
++    SKIP_HEADER(4)
++    HASH4_CALC;
++    curMatch = p->hash[kFix4HashSize + hashValue];
++    p->hash[                hash2Value] =
++    p->hash[kFix3HashSize + hash3Value] =
++    p->hash[kFix4HashSize + hashValue] = p->pos;
++    p->son[p->cyclicBufferPos] = curMatch;
++    MOVE_POS
++  }
++  while (--num != 0);
++}
++
++void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++  do
++  {
++    SKIP_HEADER(3)
++    HASH_ZIP_CALC;
++    curMatch = p->hash[hashValue];
++    p->hash[hashValue] = p->pos;
++    p->son[p->cyclicBufferPos] = curMatch;
++    MOVE_POS
++  }
++  while (--num != 0);
++}
++
++void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
++{
++  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
++  vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
++  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
++  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
++  if (!p->btMode)
++  {
++    vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
++    vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
++  }
++  else if (p->numHashBytes == 2)
++  {
++    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
++    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
++  }
++  else if (p->numHashBytes == 3)
++  {
++    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
++    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
++  }
++  else
++  {
++    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
++    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
++  }
++}
+
+Property changes on: third_party/lzma_sdk/LzFind.c
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzmaEnc.c
+===================================================================
+--- third_party/lzma_sdk/LzmaEnc.c	(revision 0)
++++ third_party/lzma_sdk/LzmaEnc.c	(revision 0)
+@@ -0,0 +1,2268 @@
++/* LzmaEnc.c -- LZMA Encoder
++2010-04-16 : Igor Pavlov : Public domain */
++
++#include <string.h>
++
++/* #define SHOW_STAT */
++/* #define SHOW_STAT2 */
++
++#if defined(SHOW_STAT) || defined(SHOW_STAT2)
++#include <stdio.h>
++#endif
++
++#include "LzmaEnc.h"
++
++#include "LzFind.h"
++#ifndef _7ZIP_ST
++#include "LzFindMt.h"
++#endif
++
++#ifdef SHOW_STAT
++static int ttt = 0;
++#endif
++
++#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1)
++
++#define kBlockSize (9 << 10)
++#define kUnpackBlockSize (1 << 18)
++#define kMatchArraySize (1 << 21)
++#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX)
++
++#define kNumMaxDirectBits (31)
++
++#define kNumTopBits 24
++#define kTopValue ((UInt32)1 << kNumTopBits)
++
++#define kNumBitModelTotalBits 11
++#define kBitModelTotal (1 << kNumBitModelTotalBits)
++#define kNumMoveBits 5
++#define kProbInitValue (kBitModelTotal >> 1)
++
++#define kNumMoveReducingBits 4
++#define kNumBitPriceShiftBits 4
++#define kBitPrice (1 << kNumBitPriceShiftBits)
++
++void LzmaEncProps_Init(CLzmaEncProps *p)
++{
++  p->level = 5;
++  p->dictSize = p->mc = 0;
++  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
++  p->writeEndMark = 0;
++}
++
++void LzmaEncProps_Normalize(CLzmaEncProps *p)
++{
++  int level = p->level;
++  if (level < 0) level = 5;
++  p->level = level;
++  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26)));
++  if (p->lc < 0) p->lc = 3;
++  if (p->lp < 0) p->lp = 0;
++  if (p->pb < 0) p->pb = 2;
++  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
++  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
++  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
++  if (p->numHashBytes < 0) p->numHashBytes = 4;
++  if (p->mc == 0)  p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
++  if (p->numThreads < 0)
++    p->numThreads =
++      #ifndef _7ZIP_ST
++      ((p->btMode && p->algo) ? 2 : 1);
++      #else
++      1;
++      #endif
++}
++
++UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
++{
++  CLzmaEncProps props = *props2;
++  LzmaEncProps_Normalize(&props);
++  return props.dictSize;
++}
++
++/* #define LZMA_LOG_BSR */
++/* Define it for Intel's CPU */
++
++
++#ifdef LZMA_LOG_BSR
++
++#define kDicLogSizeMaxCompress 30
++
++#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); }
++
++UInt32 GetPosSlot1(UInt32 pos)
++{
++  UInt32 res;
++  BSR2_RET(pos, res);
++  return res;
++}
++#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
++#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
++
++#else
++
++#define kNumLogBits (9 + (int)sizeof(size_t) / 2)
++#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
++
++void LzmaEnc_FastPosInit(Byte *g_FastPos)
++{
++  int c = 2, slotFast;
++  g_FastPos[0] = 0;
++  g_FastPos[1] = 1;
++  
++  for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++)
++  {
++    UInt32 k = (1 << ((slotFast >> 1) - 1));
++    UInt32 j;
++    for (j = 0; j < k; j++, c++)
++      g_FastPos[c] = (Byte)slotFast;
++  }
++}
++
++#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \
++  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
++  res = p->g_FastPos[pos >> i] + (i * 2); }
++/*
++#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
++  p->g_FastPos[pos >> 6] + 12 : \
++  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
++*/
++
++#define GetPosSlot1(pos) p->g_FastPos[pos]
++#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
++#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); }
++
++#endif
++
++
++#define LZMA_NUM_REPS 4
++
++typedef unsigned CState;
++
++typedef struct
++{
++  UInt32 price;
++
++  CState state;
++  int prev1IsChar;
++  int prev2;
++
++  UInt32 posPrev2;
++  UInt32 backPrev2;
++
++  UInt32 posPrev;
++  UInt32 backPrev;
++  UInt32 backs[LZMA_NUM_REPS];
++} COptimal;
++
++#define kNumOpts (1 << 12)
++
++#define kNumLenToPosStates 4
++#define kNumPosSlotBits 6
++#define kDicLogSizeMin 0
++#define kDicLogSizeMax 32
++#define kDistTableSizeMax (kDicLogSizeMax * 2)
++
++
++#define kNumAlignBits 4
++#define kAlignTableSize (1 << kNumAlignBits)
++#define kAlignMask (kAlignTableSize - 1)
++
++#define kStartPosModelIndex 4
++#define kEndPosModelIndex 14
++#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
++
++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
++
++#ifdef _LZMA_PROB32
++#define CLzmaProb UInt32
++#else
++#define CLzmaProb UInt16
++#endif
++
++#define LZMA_PB_MAX 4
++#define LZMA_LC_MAX 8
++#define LZMA_LP_MAX 4
++
++#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
++
++
++#define kLenNumLowBits 3
++#define kLenNumLowSymbols (1 << kLenNumLowBits)
++#define kLenNumMidBits 3
++#define kLenNumMidSymbols (1 << kLenNumMidBits)
++#define kLenNumHighBits 8
++#define kLenNumHighSymbols (1 << kLenNumHighBits)
++
++#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
++
++#define LZMA_MATCH_LEN_MIN 2
++#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
++
++#define kNumStates 12
++
++typedef struct
++{
++  CLzmaProb choice;
++  CLzmaProb choice2;
++  CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits];
++  CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits];
++  CLzmaProb high[kLenNumHighSymbols];
++} CLenEnc;
++
++typedef struct
++{
++  CLenEnc p;
++  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
++  UInt32 tableSize;
++  UInt32 counters[LZMA_NUM_PB_STATES_MAX];
++} CLenPriceEnc;
++
++typedef struct
++{
++  UInt32 range;
++  Byte cache;
++  UInt64 low;
++  UInt64 cacheSize;
++  Byte *buf;
++  Byte *bufLim;
++  Byte *bufBase;
++  ISeqOutStream *outStream;
++  UInt64 processed;
++  SRes res;
++} CRangeEnc;
++
++typedef struct
++{
++  CLzmaProb *litProbs;
++
++  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
++  CLzmaProb isRep[kNumStates];
++  CLzmaProb isRepG0[kNumStates];
++  CLzmaProb isRepG1[kNumStates];
++  CLzmaProb isRepG2[kNumStates];
++  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
++
++  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
++  CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
++  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
++  
++  CLenPriceEnc lenEnc;
++  CLenPriceEnc repLenEnc;
++
++  UInt32 reps[LZMA_NUM_REPS];
++  UInt32 state;
++} CSaveState;
++
++typedef struct
++{
++  IMatchFinder matchFinder;
++  void *matchFinderObj;
++
++  #ifndef _7ZIP_ST
++  Bool mtMode;
++  CMatchFinderMt matchFinderMt;
++  #endif
++
++  CMatchFinder matchFinderBase;
++
++  #ifndef _7ZIP_ST
++  Byte pad[128];
++  #endif
++  
++  UInt32 optimumEndIndex;
++  UInt32 optimumCurrentIndex;
++
++  UInt32 longestMatchLength;
++  UInt32 numPairs;
++  UInt32 numAvail;
++  COptimal opt[kNumOpts];
++  
++  #ifndef LZMA_LOG_BSR
++  Byte g_FastPos[1 << kNumLogBits];
++  #endif
++
++  UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
++  UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
++  UInt32 numFastBytes;
++  UInt32 additionalOffset;
++  UInt32 reps[LZMA_NUM_REPS];
++  UInt32 state;
++
++  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
++  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
++  UInt32 alignPrices[kAlignTableSize];
++  UInt32 alignPriceCount;
++
++  UInt32 distTableSize;
++
++  unsigned lc, lp, pb;
++  unsigned lpMask, pbMask;
++
++  CLzmaProb *litProbs;
++
++  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
++  CLzmaProb isRep[kNumStates];
++  CLzmaProb isRepG0[kNumStates];
++  CLzmaProb isRepG1[kNumStates];
++  CLzmaProb isRepG2[kNumStates];
++  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
++
++  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
++  CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
++  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
++  
++  CLenPriceEnc lenEnc;
++  CLenPriceEnc repLenEnc;
++
++  unsigned lclp;
++
++  Bool fastMode;
++  
++  CRangeEnc rc;
++
++  Bool writeEndMark;
++  UInt64 nowPos64;
++  UInt32 matchPriceCount;
++  Bool finished;
++  Bool multiThread;
++
++  SRes result;
++  UInt32 dictSize;
++  UInt32 matchFinderCycles;
++
++  int needInit;
++
++  CSaveState saveState;
++} CLzmaEnc;
++
++void LzmaEnc_SaveState(CLzmaEncHandle pp)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  CSaveState *dest = &p->saveState;
++  int i;
++  dest->lenEnc = p->lenEnc;
++  dest->repLenEnc = p->repLenEnc;
++  dest->state = p->state;
++
++  for (i = 0; i < kNumStates; i++)
++  {
++    memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
++    memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
++  }
++  for (i = 0; i < kNumLenToPosStates; i++)
++    memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
++  memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
++  memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
++  memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
++  memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
++  memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
++  memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
++  memcpy(dest->reps, p->reps, sizeof(p->reps));
++  memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
++}
++
++void LzmaEnc_RestoreState(CLzmaEncHandle pp)
++{
++  CLzmaEnc *dest = (CLzmaEnc *)pp;
++  const CSaveState *p = &dest->saveState;
++  int i;
++  dest->lenEnc = p->lenEnc;
++  dest->repLenEnc = p->repLenEnc;
++  dest->state = p->state;
++
++  for (i = 0; i < kNumStates; i++)
++  {
++    memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
++    memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
++  }
++  for (i = 0; i < kNumLenToPosStates; i++)
++    memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
++  memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
++  memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
++  memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
++  memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
++  memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
++  memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
++  memcpy(dest->reps, p->reps, sizeof(p->reps));
++  memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
++}
++
++SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  CLzmaEncProps props = *props2;
++  LzmaEncProps_Normalize(&props);
++
++  if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX ||
++      props.dictSize > ((UInt32)1 << kDicLogSizeMaxCompress) || props.dictSize > ((UInt32)1 << 30))
++    return SZ_ERROR_PARAM;
++  p->dictSize = props.dictSize;
++  p->matchFinderCycles = props.mc;
++  {
++    unsigned fb = props.fb;
++    if (fb < 5)
++      fb = 5;
++    if (fb > LZMA_MATCH_LEN_MAX)
++      fb = LZMA_MATCH_LEN_MAX;
++    p->numFastBytes = fb;
++  }
++  p->lc = props.lc;
++  p->lp = props.lp;
++  p->pb = props.pb;
++  p->fastMode = (props.algo == 0);
++  p->matchFinderBase.btMode = props.btMode;
++  {
++    UInt32 numHashBytes = 4;
++    if (props.btMode)
++    {
++      if (props.numHashBytes < 2)
++        numHashBytes = 2;
++      else if (props.numHashBytes < 4)
++        numHashBytes = props.numHashBytes;
++    }
++    p->matchFinderBase.numHashBytes = numHashBytes;
++  }
++
++  p->matchFinderBase.cutValue = props.mc;
++
++  p->writeEndMark = props.writeEndMark;
++
++  #ifndef _7ZIP_ST
++  /*
++  if (newMultiThread != _multiThread)
++  {
++    ReleaseMatchFinder();
++    _multiThread = newMultiThread;
++  }
++  */
++  p->multiThread = (props.numThreads > 1);
++  #endif
++
++  return SZ_OK;
++}
++
++static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4,  5,  6,   4, 5};
++static const int kMatchNextStates[kNumStates]   = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
++static const int kRepNextStates[kNumStates]     = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
++static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
++
++#define IsCharState(s) ((s) < 7)
++
++#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
++
++#define kInfinityPrice (1 << 30)
++
++static void RangeEnc_Construct(CRangeEnc *p)
++{
++  p->outStream = 0;
++  p->bufBase = 0;
++}
++
++#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
++
++#define RC_BUF_SIZE (1 << 16)
++static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc)
++{
++  if (p->bufBase == 0)
++  {
++    p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE);
++    if (p->bufBase == 0)
++      return 0;
++    p->bufLim = p->bufBase + RC_BUF_SIZE;
++  }
++  return 1;
++}
++
++static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc)
++{
++  alloc->Free(alloc, p->bufBase);
++  p->bufBase = 0;
++}
++
++static void RangeEnc_Init(CRangeEnc *p)
++{
++  /* Stream.Init(); */
++  p->low = 0;
++  p->range = 0xFFFFFFFF;
++  p->cacheSize = 1;
++  p->cache = 0;
++
++  p->buf = p->bufBase;
++
++  p->processed = 0;
++  p->res = SZ_OK;
++}
++
++static void RangeEnc_FlushStream(CRangeEnc *p)
++{
++  size_t num;
++  if (p->res != SZ_OK)
++    return;
++  num = p->buf - p->bufBase;
++  if (num != p->outStream->Write(p->outStream, p->bufBase, num))
++    p->res = SZ_ERROR_WRITE;
++  p->processed += num;
++  p->buf = p->bufBase;
++}
++
++static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
++{
++  if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0)
++  {
++    Byte temp = p->cache;
++    do
++    {
++      Byte *buf = p->buf;
++      *buf++ = (Byte)(temp + (Byte)(p->low >> 32));
++      p->buf = buf;
++      if (buf == p->bufLim)
++        RangeEnc_FlushStream(p);
++      temp = 0xFF;
++    }
++    while (--p->cacheSize != 0);
++    p->cache = (Byte)((UInt32)p->low >> 24);
++  }
++  p->cacheSize++;
++  p->low = (UInt32)p->low << 8;
++}
++
++static void RangeEnc_FlushData(CRangeEnc *p)
++{
++  int i;
++  for (i = 0; i < 5; i++)
++    RangeEnc_ShiftLow(p);
++}
++
++static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits)
++{
++  do
++  {
++    p->range >>= 1;
++    p->low += p->range & (0 - ((value >> --numBits) & 1));
++    if (p->range < kTopValue)
++    {
++      p->range <<= 8;
++      RangeEnc_ShiftLow(p);
++    }
++  }
++  while (numBits != 0);
++}
++
++static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol)
++{
++  UInt32 ttt = *prob;
++  UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt;
++  if (symbol == 0)
++  {
++    p->range = newBound;
++    ttt += (kBitModelTotal - ttt) >> kNumMoveBits;
++  }
++  else
++  {
++    p->low += newBound;
++    p->range -= newBound;
++    ttt -= ttt >> kNumMoveBits;
++  }
++  *prob = (CLzmaProb)ttt;
++  if (p->range < kTopValue)
++  {
++    p->range <<= 8;
++    RangeEnc_ShiftLow(p);
++  }
++}
++
++static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol)
++{
++  symbol |= 0x100;
++  do
++  {
++    RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1);
++    symbol <<= 1;
++  }
++  while (symbol < 0x10000);
++}
++
++static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte)
++{
++  UInt32 offs = 0x100;
++  symbol |= 0x100;
++  do
++  {
++    matchByte <<= 1;
++    RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1);
++    symbol <<= 1;
++    offs &= ~(matchByte ^ symbol);
++  }
++  while (symbol < 0x10000);
++}
++
++void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
++{
++  UInt32 i;
++  for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits))
++  {
++    const int kCyclesBits = kNumBitPriceShiftBits;
++    UInt32 w = i;
++    UInt32 bitCount = 0;
++    int j;
++    for (j = 0; j < kCyclesBits; j++)
++    {
++      w = w * w;
++      bitCount <<= 1;
++      while (w >= ((UInt32)1 << 16))
++      {
++        w >>= 1;
++        bitCount++;
++      }
++    }
++    ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
++  }
++}
++
++
++#define GET_PRICE(prob, symbol) \
++  p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
++
++#define GET_PRICEa(prob, symbol) \
++  ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
++
++#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
++#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
++
++#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
++#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
++
++static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices)
++{
++  UInt32 price = 0;
++  symbol |= 0x100;
++  do
++  {
++    price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1);
++    symbol <<= 1;
++  }
++  while (symbol < 0x10000);
++  return price;
++}
++
++static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices)
++{
++  UInt32 price = 0;
++  UInt32 offs = 0x100;
++  symbol |= 0x100;
++  do
++  {
++    matchByte <<= 1;
++    price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
++    symbol <<= 1;
++    offs &= ~(matchByte ^ symbol);
++  }
++  while (symbol < 0x10000);
++  return price;
++}
++
++
++static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
++{
++  UInt32 m = 1;
++  int i;
++  for (i = numBitLevels; i != 0;)
++  {
++    UInt32 bit;
++    i--;
++    bit = (symbol >> i) & 1;
++    RangeEnc_EncodeBit(rc, probs + m, bit);
++    m = (m << 1) | bit;
++  }
++}
++
++static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
++{
++  UInt32 m = 1;
++  int i;
++  for (i = 0; i < numBitLevels; i++)
++  {
++    UInt32 bit = symbol & 1;
++    RangeEnc_EncodeBit(rc, probs + m, bit);
++    m = (m << 1) | bit;
++    symbol >>= 1;
++  }
++}
++
++static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
++{
++  UInt32 price = 0;
++  symbol |= (1 << numBitLevels);
++  while (symbol != 1)
++  {
++    price += GET_PRICEa(probs[symbol >> 1], symbol & 1);
++    symbol >>= 1;
++  }
++  return price;
++}
++
++static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
++{
++  UInt32 price = 0;
++  UInt32 m = 1;
++  int i;
++  for (i = numBitLevels; i != 0; i--)
++  {
++    UInt32 bit = symbol & 1;
++    symbol >>= 1;
++    price += GET_PRICEa(probs[m], bit);
++    m = (m << 1) | bit;
++  }
++  return price;
++}
++
++
++static void LenEnc_Init(CLenEnc *p)
++{
++  unsigned i;
++  p->choice = p->choice2 = kProbInitValue;
++  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++)
++    p->low[i] = kProbInitValue;
++  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++)
++    p->mid[i] = kProbInitValue;
++  for (i = 0; i < kLenNumHighSymbols; i++)
++    p->high[i] = kProbInitValue;
++}
++
++static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState)
++{
++  if (symbol < kLenNumLowSymbols)
++  {
++    RangeEnc_EncodeBit(rc, &p->choice, 0);
++    RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol);
++  }
++  else
++  {
++    RangeEnc_EncodeBit(rc, &p->choice, 1);
++    if (symbol < kLenNumLowSymbols + kLenNumMidSymbols)
++    {
++      RangeEnc_EncodeBit(rc, &p->choice2, 0);
++      RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols);
++    }
++    else
++    {
++      RangeEnc_EncodeBit(rc, &p->choice2, 1);
++      RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols);
++    }
++  }
++}
++
++static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices)
++{
++  UInt32 a0 = GET_PRICE_0a(p->choice);
++  UInt32 a1 = GET_PRICE_1a(p->choice);
++  UInt32 b0 = a1 + GET_PRICE_0a(p->choice2);
++  UInt32 b1 = a1 + GET_PRICE_1a(p->choice2);
++  UInt32 i = 0;
++  for (i = 0; i < kLenNumLowSymbols; i++)
++  {
++    if (i >= numSymbols)
++      return;
++    prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices);
++  }
++  for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++)
++  {
++    if (i >= numSymbols)
++      return;
++    prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices);
++  }
++  for (; i < numSymbols; i++)
++    prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices);
++}
++
++static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices)
++{
++  LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices);
++  p->counters[posState] = p->tableSize;
++}
++
++static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices)
++{
++  UInt32 posState;
++  for (posState = 0; posState < numPosStates; posState++)
++    LenPriceEnc_UpdateTable(p, posState, ProbPrices);
++}
++
++static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices)
++{
++  LenEnc_Encode(&p->p, rc, symbol, posState);
++  if (updatePrice)
++    if (--p->counters[posState] == 0)
++      LenPriceEnc_UpdateTable(p, posState, ProbPrices);
++}
++
++
++
++
++static void MovePos(CLzmaEnc *p, UInt32 num)
++{
++  #ifdef SHOW_STAT
++  ttt += num;
++  printf("\n MovePos %d", num);
++  #endif
++  if (num != 0)
++  {
++    p->additionalOffset += num;
++    p->matchFinder.Skip(p->matchFinderObj, num);
++  }
++}
++
++static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes)
++{
++  UInt32 lenRes = 0, numPairs;
++  p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
++  numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
++  #ifdef SHOW_STAT
++  printf("\n i = %d numPairs = %d    ", ttt, numPairs / 2);
++  ttt++;
++  {
++    UInt32 i;
++    for (i = 0; i < numPairs; i += 2)
++      printf("%2d %6d   | ", p->matches[i], p->matches[i + 1]);
++  }
++  #endif
++  if (numPairs > 0)
++  {
++    lenRes = p->matches[numPairs - 2];
++    if (lenRes == p->numFastBytes)
++    {
++      const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++      UInt32 distance = p->matches[numPairs - 1] + 1;
++      UInt32 numAvail = p->numAvail;
++      if (numAvail > LZMA_MATCH_LEN_MAX)
++        numAvail = LZMA_MATCH_LEN_MAX;
++      {
++        const Byte *pby2 = pby - distance;
++        for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++);
++      }
++    }
++  }
++  p->additionalOffset++;
++  *numDistancePairsRes = numPairs;
++  return lenRes;
++}
++
++
++#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False;
++#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False;
++#define IsShortRep(p) ((p)->backPrev == 0)
++
++static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState)
++{
++  return
++    GET_PRICE_0(p->isRepG0[state]) +
++    GET_PRICE_0(p->isRep0Long[state][posState]);
++}
++
++static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState)
++{
++  UInt32 price;
++  if (repIndex == 0)
++  {
++    price = GET_PRICE_0(p->isRepG0[state]);
++    price += GET_PRICE_1(p->isRep0Long[state][posState]);
++  }
++  else
++  {
++    price = GET_PRICE_1(p->isRepG0[state]);
++    if (repIndex == 1)
++      price += GET_PRICE_0(p->isRepG1[state]);
++    else
++    {
++      price += GET_PRICE_1(p->isRepG1[state]);
++      price += GET_PRICE(p->isRepG2[state], repIndex - 2);
++    }
++  }
++  return price;
++}
++
++static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState)
++{
++  return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] +
++    GetPureRepPrice(p, repIndex, state, posState);
++}
++
++static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur)
++{
++  UInt32 posMem = p->opt[cur].posPrev;
++  UInt32 backMem = p->opt[cur].backPrev;
++  p->optimumEndIndex = cur;
++  do
++  {
++    if (p->opt[cur].prev1IsChar)
++    {
++      MakeAsChar(&p->opt[posMem])
++      p->opt[posMem].posPrev = posMem - 1;
++      if (p->opt[cur].prev2)
++      {
++        p->opt[posMem - 1].prev1IsChar = False;
++        p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2;
++        p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2;
++      }
++    }
++    {
++      UInt32 posPrev = posMem;
++      UInt32 backCur = backMem;
++      
++      backMem = p->opt[posPrev].backPrev;
++      posMem = p->opt[posPrev].posPrev;
++      
++      p->opt[posPrev].backPrev = backCur;
++      p->opt[posPrev].posPrev = cur;
++      cur = posPrev;
++    }
++  }
++  while (cur != 0);
++  *backRes = p->opt[0].backPrev;
++  p->optimumCurrentIndex  = p->opt[0].posPrev;
++  return p->optimumCurrentIndex;
++}
++
++#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300)
++
++static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
++{
++  UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur;
++  UInt32 matchPrice, repMatchPrice, normalMatchPrice;
++  UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS];
++  UInt32 *matches;
++  const Byte *data;
++  Byte curByte, matchByte;
++  if (p->optimumEndIndex != p->optimumCurrentIndex)
++  {
++    const COptimal *opt = &p->opt[p->optimumCurrentIndex];
++    UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex;
++    *backRes = opt->backPrev;
++    p->optimumCurrentIndex = opt->posPrev;
++    return lenRes;
++  }
++  p->optimumCurrentIndex = p->optimumEndIndex = 0;
++  
++  if (p->additionalOffset == 0)
++    mainLen = ReadMatchDistances(p, &numPairs);
++  else
++  {
++    mainLen = p->longestMatchLength;
++    numPairs = p->numPairs;
++  }
++
++  numAvail = p->numAvail;
++  if (numAvail < 2)
++  {
++    *backRes = (UInt32)(-1);
++    return 1;
++  }
++  if (numAvail > LZMA_MATCH_LEN_MAX)
++    numAvail = LZMA_MATCH_LEN_MAX;
++
++  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++  repMaxIndex = 0;
++  for (i = 0; i < LZMA_NUM_REPS; i++)
++  {
++    UInt32 lenTest;
++    const Byte *data2;
++    reps[i] = p->reps[i];
++    data2 = data - (reps[i] + 1);
++    if (data[0] != data2[0] || data[1] != data2[1])
++    {
++      repLens[i] = 0;
++      continue;
++    }
++    for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
++    repLens[i] = lenTest;
++    if (lenTest > repLens[repMaxIndex])
++      repMaxIndex = i;
++  }
++  if (repLens[repMaxIndex] >= p->numFastBytes)
++  {
++    UInt32 lenRes;
++    *backRes = repMaxIndex;
++    lenRes = repLens[repMaxIndex];
++    MovePos(p, lenRes - 1);
++    return lenRes;
++  }
++
++  matches = p->matches;
++  if (mainLen >= p->numFastBytes)
++  {
++    *backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
++    MovePos(p, mainLen - 1);
++    return mainLen;
++  }
++  curByte = *data;
++  matchByte = *(data - (reps[0] + 1));
++
++  if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2)
++  {
++    *backRes = (UInt32)-1;
++    return 1;
++  }
++
++  p->opt[0].state = (CState)p->state;
++
++  posState = (position & p->pbMask);
++
++  {
++    const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
++    p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
++        (!IsCharState(p->state) ?
++          LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
++          LitEnc_GetPrice(probs, curByte, p->ProbPrices));
++  }
++
++  MakeAsChar(&p->opt[1]);
++
++  matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
++  repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
++
++  if (matchByte == curByte)
++  {
++    UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState);
++    if (shortRepPrice < p->opt[1].price)
++    {
++      p->opt[1].price = shortRepPrice;
++      MakeAsShortRep(&p->opt[1]);
++    }
++  }
++  lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]);
++
++  if (lenEnd < 2)
++  {
++    *backRes = p->opt[1].backPrev;
++    return 1;
++  }
++
++  p->opt[1].posPrev = 0;
++  for (i = 0; i < LZMA_NUM_REPS; i++)
++    p->opt[0].backs[i] = reps[i];
++
++  len = lenEnd;
++  do
++    p->opt[len--].price = kInfinityPrice;
++  while (len >= 2);
++
++  for (i = 0; i < LZMA_NUM_REPS; i++)
++  {
++    UInt32 repLen = repLens[i];
++    UInt32 price;
++    if (repLen < 2)
++      continue;
++    price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState);
++    do
++    {
++      UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2];
++      COptimal *opt = &p->opt[repLen];
++      if (curAndLenPrice < opt->price)
++      {
++        opt->price = curAndLenPrice;
++        opt->posPrev = 0;
++        opt->backPrev = i;
++        opt->prev1IsChar = False;
++      }
++    }
++    while (--repLen >= 2);
++  }
++
++  normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
++
++  len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
++  if (len <= mainLen)
++  {
++    UInt32 offs = 0;
++    while (len > matches[offs])
++      offs += 2;
++    for (; ; len++)
++    {
++      COptimal *opt;
++      UInt32 distance = matches[offs + 1];
++
++      UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN];
++      UInt32 lenToPosState = GetLenToPosState(len);
++      if (distance < kNumFullDistances)
++        curAndLenPrice += p->distancesPrices[lenToPosState][distance];
++      else
++      {
++        UInt32 slot;
++        GetPosSlot2(distance, slot);
++        curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot];
++      }
++      opt = &p->opt[len];
++      if (curAndLenPrice < opt->price)
++      {
++        opt->price = curAndLenPrice;
++        opt->posPrev = 0;
++        opt->backPrev = distance + LZMA_NUM_REPS;
++        opt->prev1IsChar = False;
++      }
++      if (len == matches[offs])
++      {
++        offs += 2;
++        if (offs == numPairs)
++          break;
++      }
++    }
++  }
++
++  cur = 0;
++
++    #ifdef SHOW_STAT2
++    if (position >= 0)
++    {
++      unsigned i;
++      printf("\n pos = %4X", position);
++      for (i = cur; i <= lenEnd; i++)
++      printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price);
++    }
++    #endif
++
++  for (;;)
++  {
++    UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen;
++    UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice;
++    Bool nextIsChar;
++    Byte curByte, matchByte;
++    const Byte *data;
++    COptimal *curOpt;
++    COptimal *nextOpt;
++
++    cur++;
++    if (cur == lenEnd)
++      return Backward(p, backRes, cur);
++
++    newLen = ReadMatchDistances(p, &numPairs);
++    if (newLen >= p->numFastBytes)
++    {
++      p->numPairs = numPairs;
++      p->longestMatchLength = newLen;
++      return Backward(p, backRes, cur);
++    }
++    position++;
++    curOpt = &p->opt[cur];
++    posPrev = curOpt->posPrev;
++    if (curOpt->prev1IsChar)
++    {
++      posPrev--;
++      if (curOpt->prev2)
++      {
++        state = p->opt[curOpt->posPrev2].state;
++        if (curOpt->backPrev2 < LZMA_NUM_REPS)
++          state = kRepNextStates[state];
++        else
++          state = kMatchNextStates[state];
++      }
++      else
++        state = p->opt[posPrev].state;
++      state = kLiteralNextStates[state];
++    }
++    else
++      state = p->opt[posPrev].state;
++    if (posPrev == cur - 1)
++    {
++      if (IsShortRep(curOpt))
++        state = kShortRepNextStates[state];
++      else
++        state = kLiteralNextStates[state];
++    }
++    else
++    {
++      UInt32 pos;
++      const COptimal *prevOpt;
++      if (curOpt->prev1IsChar && curOpt->prev2)
++      {
++        posPrev = curOpt->posPrev2;
++        pos = curOpt->backPrev2;
++        state = kRepNextStates[state];
++      }
++      else
++      {
++        pos = curOpt->backPrev;
++        if (pos < LZMA_NUM_REPS)
++          state = kRepNextStates[state];
++        else
++          state = kMatchNextStates[state];
++      }
++      prevOpt = &p->opt[posPrev];
++      if (pos < LZMA_NUM_REPS)
++      {
++        UInt32 i;
++        reps[0] = prevOpt->backs[pos];
++        for (i = 1; i <= pos; i++)
++          reps[i] = prevOpt->backs[i - 1];
++        for (; i < LZMA_NUM_REPS; i++)
++          reps[i] = prevOpt->backs[i];
++      }
++      else
++      {
++        UInt32 i;
++        reps[0] = (pos - LZMA_NUM_REPS);
++        for (i = 1; i < LZMA_NUM_REPS; i++)
++          reps[i] = prevOpt->backs[i - 1];
++      }
++    }
++    curOpt->state = (CState)state;
++
++    curOpt->backs[0] = reps[0];
++    curOpt->backs[1] = reps[1];
++    curOpt->backs[2] = reps[2];
++    curOpt->backs[3] = reps[3];
++
++    curPrice = curOpt->price;
++    nextIsChar = False;
++    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++    curByte = *data;
++    matchByte = *(data - (reps[0] + 1));
++
++    posState = (position & p->pbMask);
++
++    curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]);
++    {
++      const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
++      curAnd1Price +=
++        (!IsCharState(state) ?
++          LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
++          LitEnc_GetPrice(probs, curByte, p->ProbPrices));
++    }
++
++    nextOpt = &p->opt[cur + 1];
++
++    if (curAnd1Price < nextOpt->price)
++    {
++      nextOpt->price = curAnd1Price;
++      nextOpt->posPrev = cur;
++      MakeAsChar(nextOpt);
++      nextIsChar = True;
++    }
++
++    matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]);
++    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
++    
++    if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0))
++    {
++      UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState);
++      if (shortRepPrice <= nextOpt->price)
++      {
++        nextOpt->price = shortRepPrice;
++        nextOpt->posPrev = cur;
++        MakeAsShortRep(nextOpt);
++        nextIsChar = True;
++      }
++    }
++    numAvailFull = p->numAvail;
++    {
++      UInt32 temp = kNumOpts - 1 - cur;
++      if (temp < numAvailFull)
++        numAvailFull = temp;
++    }
++
++    if (numAvailFull < 2)
++      continue;
++    numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
++
++    if (!nextIsChar && matchByte != curByte) /* speed optimization */
++    {
++      /* try Literal + rep0 */
++      UInt32 temp;
++      UInt32 lenTest2;
++      const Byte *data2 = data - (reps[0] + 1);
++      UInt32 limit = p->numFastBytes + 1;
++      if (limit > numAvailFull)
++        limit = numAvailFull;
++
++      for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++);
++      lenTest2 = temp - 1;
++      if (lenTest2 >= 2)
++      {
++        UInt32 state2 = kLiteralNextStates[state];
++        UInt32 posStateNext = (position + 1) & p->pbMask;
++        UInt32 nextRepMatchPrice = curAnd1Price +
++            GET_PRICE_1(p->isMatch[state2][posStateNext]) +
++            GET_PRICE_1(p->isRep[state2]);
++        /* for (; lenTest2 >= 2; lenTest2--) */
++        {
++          UInt32 curAndLenPrice;
++          COptimal *opt;
++          UInt32 offset = cur + 1 + lenTest2;
++          while (lenEnd < offset)
++            p->opt[++lenEnd].price = kInfinityPrice;
++          curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
++          opt = &p->opt[offset];
++          if (curAndLenPrice < opt->price)
++          {
++            opt->price = curAndLenPrice;
++            opt->posPrev = cur + 1;
++            opt->backPrev = 0;
++            opt->prev1IsChar = True;
++            opt->prev2 = False;
++          }
++        }
++      }
++    }
++    
++    startLen = 2; /* speed optimization */
++    {
++    UInt32 repIndex;
++    for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++)
++    {
++      UInt32 lenTest;
++      UInt32 lenTestTemp;
++      UInt32 price;
++      const Byte *data2 = data - (reps[repIndex] + 1);
++      if (data[0] != data2[0] || data[1] != data2[1])
++        continue;
++      for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
++      while (lenEnd < cur + lenTest)
++        p->opt[++lenEnd].price = kInfinityPrice;
++      lenTestTemp = lenTest;
++      price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState);
++      do
++      {
++        UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2];
++        COptimal *opt = &p->opt[cur + lenTest];
++        if (curAndLenPrice < opt->price)
++        {
++          opt->price = curAndLenPrice;
++          opt->posPrev = cur;
++          opt->backPrev = repIndex;
++          opt->prev1IsChar = False;
++        }
++      }
++      while (--lenTest >= 2);
++      lenTest = lenTestTemp;
++      
++      if (repIndex == 0)
++        startLen = lenTest + 1;
++        
++      /* if (_maxMode) */
++        {
++          UInt32 lenTest2 = lenTest + 1;
++          UInt32 limit = lenTest2 + p->numFastBytes;
++          UInt32 nextRepMatchPrice;
++          if (limit > numAvailFull)
++            limit = numAvailFull;
++          for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
++          lenTest2 -= lenTest + 1;
++          if (lenTest2 >= 2)
++          {
++            UInt32 state2 = kRepNextStates[state];
++            UInt32 posStateNext = (position + lenTest) & p->pbMask;
++            UInt32 curAndLenCharPrice =
++                price + p->repLenEnc.prices[posState][lenTest - 2] +
++                GET_PRICE_0(p->isMatch[state2][posStateNext]) +
++                LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
++                    data[lenTest], data2[lenTest], p->ProbPrices);
++            state2 = kLiteralNextStates[state2];
++            posStateNext = (position + lenTest + 1) & p->pbMask;
++            nextRepMatchPrice = curAndLenCharPrice +
++                GET_PRICE_1(p->isMatch[state2][posStateNext]) +
++                GET_PRICE_1(p->isRep[state2]);
++            
++            /* for (; lenTest2 >= 2; lenTest2--) */
++            {
++              UInt32 curAndLenPrice;
++              COptimal *opt;
++              UInt32 offset = cur + lenTest + 1 + lenTest2;
++              while (lenEnd < offset)
++                p->opt[++lenEnd].price = kInfinityPrice;
++              curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
++              opt = &p->opt[offset];
++              if (curAndLenPrice < opt->price)
++              {
++                opt->price = curAndLenPrice;
++                opt->posPrev = cur + lenTest + 1;
++                opt->backPrev = 0;
++                opt->prev1IsChar = True;
++                opt->prev2 = True;
++                opt->posPrev2 = cur;
++                opt->backPrev2 = repIndex;
++              }
++            }
++          }
++        }
++    }
++    }
++    /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */
++    if (newLen > numAvail)
++    {
++      newLen = numAvail;
++      for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
++      matches[numPairs] = newLen;
++      numPairs += 2;
++    }
++    if (newLen >= startLen)
++    {
++      UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
++      UInt32 offs, curBack, posSlot;
++      UInt32 lenTest;
++      while (lenEnd < cur + newLen)
++        p->opt[++lenEnd].price = kInfinityPrice;
++
++      offs = 0;
++      while (startLen > matches[offs])
++        offs += 2;
++      curBack = matches[offs + 1];
++      GetPosSlot2(curBack, posSlot);
++      for (lenTest = /*2*/ startLen; ; lenTest++)
++      {
++        UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN];
++        UInt32 lenToPosState = GetLenToPosState(lenTest);
++        COptimal *opt;
++        if (curBack < kNumFullDistances)
++          curAndLenPrice += p->distancesPrices[lenToPosState][curBack];
++        else
++          curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask];
++        
++        opt = &p->opt[cur + lenTest];
++        if (curAndLenPrice < opt->price)
++        {
++          opt->price = curAndLenPrice;
++          opt->posPrev = cur;
++          opt->backPrev = curBack + LZMA_NUM_REPS;
++          opt->prev1IsChar = False;
++        }
++
++        if (/*_maxMode && */lenTest == matches[offs])
++        {
++          /* Try Match + Literal + Rep0 */
++          const Byte *data2 = data - (curBack + 1);
++          UInt32 lenTest2 = lenTest + 1;
++          UInt32 limit = lenTest2 + p->numFastBytes;
++          UInt32 nextRepMatchPrice;
++          if (limit > numAvailFull)
++            limit = numAvailFull;
++          for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
++          lenTest2 -= lenTest + 1;
++          if (lenTest2 >= 2)
++          {
++            UInt32 state2 = kMatchNextStates[state];
++            UInt32 posStateNext = (position + lenTest) & p->pbMask;
++            UInt32 curAndLenCharPrice = curAndLenPrice +
++                GET_PRICE_0(p->isMatch[state2][posStateNext]) +
++                LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
++                    data[lenTest], data2[lenTest], p->ProbPrices);
++            state2 = kLiteralNextStates[state2];
++            posStateNext = (posStateNext + 1) & p->pbMask;
++            nextRepMatchPrice = curAndLenCharPrice +
++                GET_PRICE_1(p->isMatch[state2][posStateNext]) +
++                GET_PRICE_1(p->isRep[state2]);
++            
++            /* for (; lenTest2 >= 2; lenTest2--) */
++            {
++              UInt32 offset = cur + lenTest + 1 + lenTest2;
++              UInt32 curAndLenPrice;
++              COptimal *opt;
++              while (lenEnd < offset)
++                p->opt[++lenEnd].price = kInfinityPrice;
++              curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
++              opt = &p->opt[offset];
++              if (curAndLenPrice < opt->price)
++              {
++                opt->price = curAndLenPrice;
++                opt->posPrev = cur + lenTest + 1;
++                opt->backPrev = 0;
++                opt->prev1IsChar = True;
++                opt->prev2 = True;
++                opt->posPrev2 = cur;
++                opt->backPrev2 = curBack + LZMA_NUM_REPS;
++              }
++            }
++          }
++          offs += 2;
++          if (offs == numPairs)
++            break;
++          curBack = matches[offs + 1];
++          if (curBack >= kNumFullDistances)
++            GetPosSlot2(curBack, posSlot);
++        }
++      }
++    }
++  }
++}
++
++#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
++
++static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
++{
++  UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i;
++  const Byte *data;
++  const UInt32 *matches;
++
++  if (p->additionalOffset == 0)
++    mainLen = ReadMatchDistances(p, &numPairs);
++  else
++  {
++    mainLen = p->longestMatchLength;
++    numPairs = p->numPairs;
++  }
++
++  numAvail = p->numAvail;
++  *backRes = (UInt32)-1;
++  if (numAvail < 2)
++    return 1;
++  if (numAvail > LZMA_MATCH_LEN_MAX)
++    numAvail = LZMA_MATCH_LEN_MAX;
++  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++
++  repLen = repIndex = 0;
++  for (i = 0; i < LZMA_NUM_REPS; i++)
++  {
++    UInt32 len;
++    const Byte *data2 = data - (p->reps[i] + 1);
++    if (data[0] != data2[0] || data[1] != data2[1])
++      continue;
++    for (len = 2; len < numAvail && data[len] == data2[len]; len++);
++    if (len >= p->numFastBytes)
++    {
++      *backRes = i;
++      MovePos(p, len - 1);
++      return len;
++    }
++    if (len > repLen)
++    {
++      repIndex = i;
++      repLen = len;
++    }
++  }
++
++  matches = p->matches;
++  if (mainLen >= p->numFastBytes)
++  {
++    *backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
++    MovePos(p, mainLen - 1);
++    return mainLen;
++  }
++
++  mainDist = 0; /* for GCC */
++  if (mainLen >= 2)
++  {
++    mainDist = matches[numPairs - 1];
++    while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1)
++    {
++      if (!ChangePair(matches[numPairs - 3], mainDist))
++        break;
++      numPairs -= 2;
++      mainLen = matches[numPairs - 2];
++      mainDist = matches[numPairs - 1];
++    }
++    if (mainLen == 2 && mainDist >= 0x80)
++      mainLen = 1;
++  }
++
++  if (repLen >= 2 && (
++        (repLen + 1 >= mainLen) ||
++        (repLen + 2 >= mainLen && mainDist >= (1 << 9)) ||
++        (repLen + 3 >= mainLen && mainDist >= (1 << 15))))
++  {
++    *backRes = repIndex;
++    MovePos(p, repLen - 1);
++    return repLen;
++  }
++  
++  if (mainLen < 2 || numAvail <= 2)
++    return 1;
++
++  p->longestMatchLength = ReadMatchDistances(p, &p->numPairs);
++  if (p->longestMatchLength >= 2)
++  {
++    UInt32 newDistance = matches[p->numPairs - 1];
++    if ((p->longestMatchLength >= mainLen && newDistance < mainDist) ||
++        (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) ||
++        (p->longestMatchLength > mainLen + 1) ||
++        (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist)))
++      return 1;
++  }
++  
++  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++  for (i = 0; i < LZMA_NUM_REPS; i++)
++  {
++    UInt32 len, limit;
++    const Byte *data2 = data - (p->reps[i] + 1);
++    if (data[0] != data2[0] || data[1] != data2[1])
++      continue;
++    limit = mainLen - 1;
++    for (len = 2; len < limit && data[len] == data2[len]; len++);
++    if (len >= limit)
++      return 1;
++  }
++  *backRes = mainDist + LZMA_NUM_REPS;
++  MovePos(p, mainLen - 2);
++  return mainLen;
++}
++
++static void WriteEndMarker(CLzmaEnc *p, UInt32 posState)
++{
++  UInt32 len;
++  RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
++  RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
++  p->state = kMatchNextStates[p->state];
++  len = LZMA_MATCH_LEN_MIN;
++  LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
++  RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1);
++  RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits);
++  RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
++}
++
++static SRes CheckErrors(CLzmaEnc *p)
++{
++  if (p->result != SZ_OK)
++    return p->result;
++  if (p->rc.res != SZ_OK)
++    p->result = SZ_ERROR_WRITE;
++  if (p->matchFinderBase.result != SZ_OK)
++    p->result = SZ_ERROR_READ;
++  if (p->result != SZ_OK)
++    p->finished = True;
++  return p->result;
++}
++
++static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
++{
++  /* ReleaseMFStream(); */
++  p->finished = True;
++  if (p->writeEndMark)
++    WriteEndMarker(p, nowPos & p->pbMask);
++  RangeEnc_FlushData(&p->rc);
++  RangeEnc_FlushStream(&p->rc);
++  return CheckErrors(p);
++}
++
++static void FillAlignPrices(CLzmaEnc *p)
++{
++  UInt32 i;
++  for (i = 0; i < kAlignTableSize; i++)
++    p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
++  p->alignPriceCount = 0;
++}
++
++static void FillDistancesPrices(CLzmaEnc *p)
++{
++  UInt32 tempPrices[kNumFullDistances];
++  UInt32 i, lenToPosState;
++  for (i = kStartPosModelIndex; i < kNumFullDistances; i++)
++  {
++    UInt32 posSlot = GetPosSlot1(i);
++    UInt32 footerBits = ((posSlot >> 1) - 1);
++    UInt32 base = ((2 | (posSlot & 1)) << footerBits);
++    tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices);
++  }
++
++  for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
++  {
++    UInt32 posSlot;
++    const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState];
++    UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState];
++    for (posSlot = 0; posSlot < p->distTableSize; posSlot++)
++      posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices);
++    for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++)
++      posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
++
++    {
++      UInt32 *distancesPrices = p->distancesPrices[lenToPosState];
++      UInt32 i;
++      for (i = 0; i < kStartPosModelIndex; i++)
++        distancesPrices[i] = posSlotPrices[i];
++      for (; i < kNumFullDistances; i++)
++        distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i];
++    }
++  }
++  p->matchPriceCount = 0;
++}
++
++void LzmaEnc_Construct(CLzmaEnc *p)
++{
++  RangeEnc_Construct(&p->rc);
++  MatchFinder_Construct(&p->matchFinderBase);
++  #ifndef _7ZIP_ST
++  MatchFinderMt_Construct(&p->matchFinderMt);
++  p->matchFinderMt.MatchFinder = &p->matchFinderBase;
++  #endif
++
++  {
++    CLzmaEncProps props;
++    LzmaEncProps_Init(&props);
++    LzmaEnc_SetProps(p, &props);
++  }
++
++  #ifndef LZMA_LOG_BSR
++  LzmaEnc_FastPosInit(p->g_FastPos);
++  #endif
++
++  LzmaEnc_InitPriceTables(p->ProbPrices);
++  p->litProbs = 0;
++  p->saveState.litProbs = 0;
++}
++
++CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc)
++{
++  void *p;
++  p = alloc->Alloc(alloc, sizeof(CLzmaEnc));
++  if (p != 0)
++    LzmaEnc_Construct((CLzmaEnc *)p);
++  return p;
++}
++
++void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc)
++{
++  alloc->Free(alloc, p->litProbs);
++  alloc->Free(alloc, p->saveState.litProbs);
++  p->litProbs = 0;
++  p->saveState.litProbs = 0;
++}
++
++void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  #ifndef _7ZIP_ST
++  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
++  #endif
++  MatchFinder_Free(&p->matchFinderBase, allocBig);
++  LzmaEnc_FreeLits(p, alloc);
++  RangeEnc_Free(&p->rc, alloc);
++}
++
++void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
++  alloc->Free(alloc, p);
++}
++
++static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize)
++{
++  UInt32 nowPos32, startPos32;
++  if (p->needInit)
++  {
++    p->matchFinder.Init(p->matchFinderObj);
++    p->needInit = 0;
++  }
++
++  if (p->finished)
++    return p->result;
++  RINOK(CheckErrors(p));
++
++  nowPos32 = (UInt32)p->nowPos64;
++  startPos32 = nowPos32;
++
++  if (p->nowPos64 == 0)
++  {
++    UInt32 numPairs;
++    Byte curByte;
++    if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
++      return Flush(p, nowPos32);
++    ReadMatchDistances(p, &numPairs);
++    RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0);
++    p->state = kLiteralNextStates[p->state];
++    curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset);
++    LitEnc_Encode(&p->rc, p->litProbs, curByte);
++    p->additionalOffset--;
++    nowPos32++;
++  }
++
++  if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
++  for (;;)
++  {
++    UInt32 pos, len, posState;
++
++    if (p->fastMode)
++      len = GetOptimumFast(p, &pos);
++    else
++      len = GetOptimum(p, nowPos32, &pos);
++
++    #ifdef SHOW_STAT2
++    printf("\n pos = %4X,   len = %d   pos = %d", nowPos32, len, pos);
++    #endif
++
++    posState = nowPos32 & p->pbMask;
++    if (len == 1 && pos == (UInt32)-1)
++    {
++      Byte curByte;
++      CLzmaProb *probs;
++      const Byte *data;
++
++      RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0);
++      data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
++      curByte = *data;
++      probs = LIT_PROBS(nowPos32, *(data - 1));
++      if (IsCharState(p->state))
++        LitEnc_Encode(&p->rc, probs, curByte);
++      else
++        LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1));
++      p->state = kLiteralNextStates[p->state];
++    }
++    else
++    {
++      RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
++      if (pos < LZMA_NUM_REPS)
++      {
++        RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1);
++        if (pos == 0)
++        {
++          RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0);
++          RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1));
++        }
++        else
++        {
++          UInt32 distance = p->reps[pos];
++          RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1);
++          if (pos == 1)
++            RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0);
++          else
++          {
++            RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1);
++            RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2);
++            if (pos == 3)
++              p->reps[3] = p->reps[2];
++            p->reps[2] = p->reps[1];
++          }
++          p->reps[1] = p->reps[0];
++          p->reps[0] = distance;
++        }
++        if (len == 1)
++          p->state = kShortRepNextStates[p->state];
++        else
++        {
++          LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
++          p->state = kRepNextStates[p->state];
++        }
++      }
++      else
++      {
++        UInt32 posSlot;
++        RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
++        p->state = kMatchNextStates[p->state];
++        LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
++        pos -= LZMA_NUM_REPS;
++        GetPosSlot(pos, posSlot);
++        RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot);
++        
++        if (posSlot >= kStartPosModelIndex)
++        {
++          UInt32 footerBits = ((posSlot >> 1) - 1);
++          UInt32 base = ((2 | (posSlot & 1)) << footerBits);
++          UInt32 posReduced = pos - base;
++
++          if (posSlot < kEndPosModelIndex)
++            RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced);
++          else
++          {
++            RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
++            RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
++            p->alignPriceCount++;
++          }
++        }
++        p->reps[3] = p->reps[2];
++        p->reps[2] = p->reps[1];
++        p->reps[1] = p->reps[0];
++        p->reps[0] = pos;
++        p->matchPriceCount++;
++      }
++    }
++    p->additionalOffset -= len;
++    nowPos32 += len;
++    if (p->additionalOffset == 0)
++    {
++      UInt32 processed;
++      if (!p->fastMode)
++      {
++        if (p->matchPriceCount >= (1 << 7))
++          FillDistancesPrices(p);
++        if (p->alignPriceCount >= kAlignTableSize)
++          FillAlignPrices(p);
++      }
++      if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
++        break;
++      processed = nowPos32 - startPos32;
++      if (useLimits)
++      {
++        if (processed + kNumOpts + 300 >= maxUnpackSize ||
++            RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize)
++          break;
++      }
++      else if (processed >= (1 << 15))
++      {
++        p->nowPos64 += nowPos32 - startPos32;
++        return CheckErrors(p);
++      }
++    }
++  }
++  p->nowPos64 += nowPos32 - startPos32;
++  return Flush(p, nowPos32);
++}
++
++#define kBigHashDicLimit ((UInt32)1 << 24)
++
++static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  UInt32 beforeSize = kNumOpts;
++  Bool btMode;
++  if (!RangeEnc_Alloc(&p->rc, alloc))
++    return SZ_ERROR_MEM;
++  btMode = (p->matchFinderBase.btMode != 0);
++  #ifndef _7ZIP_ST
++  p->mtMode = (p->multiThread && !p->fastMode && btMode);
++  #endif
++
++  {
++    unsigned lclp = p->lc + p->lp;
++    if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
++    {
++      LzmaEnc_FreeLits(p, alloc);
++      p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
++      p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
++      if (p->litProbs == 0 || p->saveState.litProbs == 0)
++      {
++        LzmaEnc_FreeLits(p, alloc);
++        return SZ_ERROR_MEM;
++      }
++      p->lclp = lclp;
++    }
++  }
++
++  p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit);
++
++  if (beforeSize + p->dictSize < keepWindowSize)
++    beforeSize = keepWindowSize - p->dictSize;
++
++  #ifndef _7ZIP_ST
++  if (p->mtMode)
++  {
++    RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig));
++    p->matchFinderObj = &p->matchFinderMt;
++    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
++  }
++  else
++  #endif
++  {
++    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
++      return SZ_ERROR_MEM;
++    p->matchFinderObj = &p->matchFinderBase;
++    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
++  }
++  return SZ_OK;
++}
++
++void LzmaEnc_Init(CLzmaEnc *p)
++{
++  UInt32 i;
++  p->state = 0;
++  for (i = 0 ; i < LZMA_NUM_REPS; i++)
++    p->reps[i] = 0;
++
++  RangeEnc_Init(&p->rc);
++
++
++  for (i = 0; i < kNumStates; i++)
++  {
++    UInt32 j;
++    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
++    {
++      p->isMatch[i][j] = kProbInitValue;
++      p->isRep0Long[i][j] = kProbInitValue;
++    }
++    p->isRep[i] = kProbInitValue;
++    p->isRepG0[i] = kProbInitValue;
++    p->isRepG1[i] = kProbInitValue;
++    p->isRepG2[i] = kProbInitValue;
++  }
++
++  {
++    UInt32 num = 0x300 << (p->lp + p->lc);
++    for (i = 0; i < num; i++)
++      p->litProbs[i] = kProbInitValue;
++  }
++
++  {
++    for (i = 0; i < kNumLenToPosStates; i++)
++    {
++      CLzmaProb *probs = p->posSlotEncoder[i];
++      UInt32 j;
++      for (j = 0; j < (1 << kNumPosSlotBits); j++)
++        probs[j] = kProbInitValue;
++    }
++  }
++  {
++    for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
++      p->posEncoders[i] = kProbInitValue;
++  }
++
++  LenEnc_Init(&p->lenEnc.p);
++  LenEnc_Init(&p->repLenEnc.p);
++
++  for (i = 0; i < (1 << kNumAlignBits); i++)
++    p->posAlignEncoder[i] = kProbInitValue;
++
++  p->optimumEndIndex = 0;
++  p->optimumCurrentIndex = 0;
++  p->additionalOffset = 0;
++
++  p->pbMask = (1 << p->pb) - 1;
++  p->lpMask = (1 << p->lp) - 1;
++}
++
++void LzmaEnc_InitPrices(CLzmaEnc *p)
++{
++  if (!p->fastMode)
++  {
++    FillDistancesPrices(p);
++    FillAlignPrices(p);
++  }
++
++  p->lenEnc.tableSize =
++  p->repLenEnc.tableSize =
++      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
++  LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices);
++  LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices);
++}
++
++static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  UInt32 i;
++  for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++)
++    if (p->dictSize <= ((UInt32)1 << i))
++      break;
++  p->distTableSize = i * 2;
++
++  p->finished = False;
++  p->result = SZ_OK;
++  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
++  LzmaEnc_Init(p);
++  LzmaEnc_InitPrices(p);
++  p->nowPos64 = 0;
++  return SZ_OK;
++}
++
++static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,
++    ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  p->matchFinderBase.stream = inStream;
++  p->needInit = 1;
++  p->rc.outStream = outStream;
++  return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
++}
++
++SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
++    ISeqInStream *inStream, UInt32 keepWindowSize,
++    ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  p->matchFinderBase.stream = inStream;
++  p->needInit = 1;
++  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
++}
++
++static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
++{
++  p->matchFinderBase.directInput = 1;
++  p->matchFinderBase.bufferBase = (Byte *)src;
++  p->matchFinderBase.directInputRem = srcLen;
++}
++
++SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
++    UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  LzmaEnc_SetInputBuf(p, src, srcLen);
++  p->needInit = 1;
++
++  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
++}
++
++void LzmaEnc_Finish(CLzmaEncHandle pp)
++{
++  #ifndef _7ZIP_ST
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  if (p->mtMode)
++    MatchFinderMt_ReleaseStream(&p->matchFinderMt);
++  #else
++  pp = pp;
++  #endif
++}
++
++typedef struct
++{
++  ISeqOutStream funcTable;
++  Byte *data;
++  SizeT rem;
++  Bool overflow;
++} CSeqOutStreamBuf;
++
++static size_t MyWrite(void *pp, const void *data, size_t size)
++{
++  CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp;
++  if (p->rem < size)
++  {
++    size = p->rem;
++    p->overflow = True;
++  }
++  memcpy(p->data, data, size);
++  p->rem -= size;
++  p->data += size;
++  return size;
++}
++
++
++UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
++{
++  const CLzmaEnc *p = (CLzmaEnc *)pp;
++  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
++}
++
++const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
++{
++  const CLzmaEnc *p = (CLzmaEnc *)pp;
++  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
++}
++
++SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
++    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  UInt64 nowPos64;
++  SRes res;
++  CSeqOutStreamBuf outStream;
++
++  outStream.funcTable.Write = MyWrite;
++  outStream.data = dest;
++  outStream.rem = *destLen;
++  outStream.overflow = False;
++
++  p->writeEndMark = False;
++  p->finished = False;
++  p->result = SZ_OK;
++
++  if (reInit)
++    LzmaEnc_Init(p);
++  LzmaEnc_InitPrices(p);
++  nowPos64 = p->nowPos64;
++  RangeEnc_Init(&p->rc);
++  p->rc.outStream = &outStream.funcTable;
++
++  res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize);
++  
++  *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
++  *destLen -= outStream.rem;
++  if (outStream.overflow)
++    return SZ_ERROR_OUTPUT_EOF;
++
++  return res;
++}
++
++static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
++{
++  SRes res = SZ_OK;
++
++  #ifndef _7ZIP_ST
++  Byte allocaDummy[0x300];
++  int i = 0;
++  for (i = 0; i < 16; i++)
++    allocaDummy[i] = (Byte)i;
++  #endif
++
++  for (;;)
++  {
++    res = LzmaEnc_CodeOneBlock(p, False, 0, 0);
++    if (res != SZ_OK || p->finished != 0)
++      break;
++    if (progress != 0)
++    {
++      res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
++      if (res != SZ_OK)
++      {
++        res = SZ_ERROR_PROGRESS;
++        break;
++      }
++    }
++  }
++  LzmaEnc_Finish(p);
++  return res;
++}
++
++SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
++    ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));
++  return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);
++}
++
++SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
++{
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++  int i;
++  UInt32 dictSize = p->dictSize;
++  if (*size < LZMA_PROPS_SIZE)
++    return SZ_ERROR_PARAM;
++  *size = LZMA_PROPS_SIZE;
++  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
++
++  for (i = 11; i <= 30; i++)
++  {
++    if (dictSize <= ((UInt32)2 << i))
++    {
++      dictSize = (2 << i);
++      break;
++    }
++    if (dictSize <= ((UInt32)3 << i))
++    {
++      dictSize = (3 << i);
++      break;
++    }
++  }
++
++  for (i = 0; i < 4; i++)
++    props[1 + i] = (Byte)(dictSize >> (8 * i));
++  return SZ_OK;
++}
++
++SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++    int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  SRes res;
++  CLzmaEnc *p = (CLzmaEnc *)pp;
++
++  CSeqOutStreamBuf outStream;
++
++  LzmaEnc_SetInputBuf(p, src, srcLen);
++
++  outStream.funcTable.Write = MyWrite;
++  outStream.data = dest;
++  outStream.rem = *destLen;
++  outStream.overflow = False;
++
++  p->writeEndMark = writeEndMark;
++
++  p->rc.outStream = &outStream.funcTable;
++  res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);
++  if (res == SZ_OK)
++    res = LzmaEnc_Encode2(p, progress);
++
++  *destLen -= outStream.rem;
++  if (outStream.overflow)
++    return SZ_ERROR_OUTPUT_EOF;
++  return res;
++}
++
++SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
++    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
++  SRes res;
++  if (p == 0)
++    return SZ_ERROR_MEM;
++
++  res = LzmaEnc_SetProps(p, props);
++  if (res == SZ_OK)
++  {
++    res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
++    if (res == SZ_OK)
++      res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
++          writeEndMark, progress, alloc, allocBig);
++  }
++
++  LzmaEnc_Destroy(p, alloc, allocBig);
++  return res;
++}
+
+Property changes on: third_party/lzma_sdk/LzmaEnc.c
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/Alloc.c
+===================================================================
+--- third_party/lzma_sdk/Alloc.c	(revision 0)
++++ third_party/lzma_sdk/Alloc.c	(revision 0)
+@@ -0,0 +1,127 @@
++/* Alloc.c -- Memory allocation functions
++2008-09-24
++Igor Pavlov
++Public domain */
++
++#ifdef _WIN32
++#include <windows.h>
++#endif
++#include <stdlib.h>
++
++#include "Alloc.h"
++
++/* #define _SZ_ALLOC_DEBUG */
++
++/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
++#ifdef _SZ_ALLOC_DEBUG
++#include <stdio.h>
++int g_allocCount = 0;
++int g_allocCountMid = 0;
++int g_allocCountBig = 0;
++#endif
++
++void *MyAlloc(size_t size)
++{
++  if (size == 0)
++    return 0;
++  #ifdef _SZ_ALLOC_DEBUG
++  {
++    void *p = malloc(size);
++    fprintf(stderr, "\nAlloc %10d bytes, count = %10d,  addr = %8X", size, g_allocCount++, (unsigned)p);
++    return p;
++  }
++  #else
++  return malloc(size);
++  #endif
++}
++
++void MyFree(void *address)
++{
++  #ifdef _SZ_ALLOC_DEBUG
++  if (address != 0)
++    fprintf(stderr, "\nFree; count = %10d,  addr = %8X", --g_allocCount, (unsigned)address);
++  #endif
++  free(address);
++}
++
++#ifdef _WIN32
++
++void *MidAlloc(size_t size)
++{
++  if (size == 0)
++    return 0;
++  #ifdef _SZ_ALLOC_DEBUG
++  fprintf(stderr, "\nAlloc_Mid %10d bytes;  count = %10d", size, g_allocCountMid++);
++  #endif
++  return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
++}
++
++void MidFree(void *address)
++{
++  #ifdef _SZ_ALLOC_DEBUG
++  if (address != 0)
++    fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid);
++  #endif
++  if (address == 0)
++    return;
++  VirtualFree(address, 0, MEM_RELEASE);
++}
++
++#ifndef MEM_LARGE_PAGES
++#undef _7ZIP_LARGE_PAGES
++#endif
++
++#ifdef _7ZIP_LARGE_PAGES
++SIZE_T g_LargePageSize = 0;
++typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
++#endif
++
++void SetLargePageSize()
++{
++  #ifdef _7ZIP_LARGE_PAGES
++  SIZE_T size = 0;
++  GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
++        GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
++  if (largePageMinimum == 0)
++    return;
++  size = largePageMinimum();
++  if (size == 0 || (size & (size - 1)) != 0)
++    return;
++  g_LargePageSize = size;
++  #endif
++}
++
++
++void *BigAlloc(size_t size)
++{
++  if (size == 0)
++    return 0;
++  #ifdef _SZ_ALLOC_DEBUG
++  fprintf(stderr, "\nAlloc_Big %10d bytes;  count = %10d", size, g_allocCountBig++);
++  #endif
++  
++  #ifdef _7ZIP_LARGE_PAGES
++  if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18))
++  {
++    void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)),
++        MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
++    if (res != 0)
++      return res;
++  }
++  #endif
++  return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
++}
++
++void BigFree(void *address)
++{
++  #ifdef _SZ_ALLOC_DEBUG
++  if (address != 0)
++    fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig);
++  #endif
++  
++  if (address == 0)
++    return;
++  VirtualFree(address, 0, MEM_RELEASE);
++}
++
++#endif
+
+Property changes on: third_party/lzma_sdk/Alloc.c
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzmaLib.c
+===================================================================
+--- third_party/lzma_sdk/LzmaLib.c	(revision 0)
++++ third_party/lzma_sdk/LzmaLib.c	(revision 0)
+@@ -0,0 +1,46 @@
++/* LzmaLib.c -- LZMA library wrapper
++2008-08-05
++Igor Pavlov
++Public domain */
++
++#include "LzmaEnc.h"
++#include "LzmaDec.h"
++#include "Alloc.h"
++#include "LzmaLib.h"
++
++static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
++static void SzFree(void *p, void *address) { p = p; MyFree(address); }
++static ISzAlloc g_Alloc = { SzAlloc, SzFree };
++
++MY_STDAPI LzmaCompress(unsigned char *dest, size_t  *destLen, const unsigned char *src, size_t  srcLen,
++  unsigned char *outProps, size_t *outPropsSize,
++  int level, /* 0 <= level <= 9, default = 5 */
++  unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
++  int lc, /* 0 <= lc <= 8, default = 3  */
++  int lp, /* 0 <= lp <= 4, default = 0  */
++  int pb, /* 0 <= pb <= 4, default = 2  */
++  int fb,  /* 5 <= fb <= 273, default = 32 */
++  int numThreads /* 1 or 2, default = 2 */
++)
++{
++  CLzmaEncProps props;
++  LzmaEncProps_Init(&props);
++  props.level = level;
++  props.dictSize = dictSize;
++  props.lc = lc;
++  props.lp = lp;
++  props.pb = pb;
++  props.fb = fb;
++  props.numThreads = numThreads;
++
++  return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
++      NULL, &g_Alloc, &g_Alloc);
++}
++
++
++MY_STDAPI LzmaUncompress(unsigned char *dest, size_t  *destLen, const unsigned char *src, size_t  *srcLen,
++  const unsigned char *props, size_t propsSize)
++{
++  ELzmaStatus status;
++  return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
++}
+
+Property changes on: third_party/lzma_sdk/LzmaLib.c
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
+Index: third_party/lzma_sdk/LzFind.h
+===================================================================
+--- third_party/lzma_sdk/LzFind.h	(revision 0)
++++ third_party/lzma_sdk/LzFind.h	(revision 0)
+@@ -0,0 +1,115 @@
++/* LzFind.h -- Match finder for LZ algorithms
++2009-04-22 : Igor Pavlov : Public domain */
++
++#ifndef __LZ_FIND_H
++#define __LZ_FIND_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++typedef UInt32 CLzRef;
++
++typedef struct _CMatchFinder
++{
++  Byte *buffer;
++  UInt32 pos;
++  UInt32 posLimit;
++  UInt32 streamPos;
++  UInt32 lenLimit;
++
++  UInt32 cyclicBufferPos;
++  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
++
++  UInt32 matchMaxLen;
++  CLzRef *hash;
++  CLzRef *son;
++  UInt32 hashMask;
++  UInt32 cutValue;
++
++  Byte *bufferBase;
++  ISeqInStream *stream;
++  int streamEndWasReached;
++
++  UInt32 blockSize;
++  UInt32 keepSizeBefore;
++  UInt32 keepSizeAfter;
++
++  UInt32 numHashBytes;
++  int directInput;
++  size_t directInputRem;
++  int btMode;
++  int bigHash;
++  UInt32 historySize;
++  UInt32 fixedHashSize;
++  UInt32 hashSizeSum;
++  UInt32 numSons;
++  SRes result;
++  UInt32 crc[256];
++} CMatchFinder;
++
++#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
++#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
++
++#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
++
++int MatchFinder_NeedMove(CMatchFinder *p);
++Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
++void MatchFinder_MoveBlock(CMatchFinder *p);
++void MatchFinder_ReadIfRequired(CMatchFinder *p);
++
++void MatchFinder_Construct(CMatchFinder *p);
++
++/* Conditions:
++     historySize <= 3 GB
++     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
++*/
++int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
++    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
++    ISzAlloc *alloc);
++void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
++void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
++void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
++
++UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
++    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
++    UInt32 *distances, UInt32 maxLen);
++
++/*
++Conditions:
++  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
++  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
++*/
++
++typedef void (*Mf_Init_Func)(void *object);
++typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
++typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
++typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
++typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
++typedef void (*Mf_Skip_Func)(void *object, UInt32);
++
++typedef struct _IMatchFinder
++{
++  Mf_Init_Func Init;
++  Mf_GetIndexByte_Func GetIndexByte;
++  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
++  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
++  Mf_GetMatches_Func GetMatches;
++  Mf_Skip_Func Skip;
++} IMatchFinder;
++
++void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
++
++void MatchFinder_Init(CMatchFinder *p);
++UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
++UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
++void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
++void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzFind.h
+___________________________________________________________________
+Added: svn:eol-style
+   + LF
+
diff --git a/shared.mk b/shared.mk
new file mode 100644
index 0000000..15dca18
--- /dev/null
+++ b/shared.mk
@@ -0,0 +1,17 @@
+OS := $(shell uname)
+IDIRS=-I../brotli/dec/ -I../brotli/enc/ -I../
+
+GFLAGS=-no-canonical-prefixes -fno-omit-frame-pointer -m64
+
+CPP = g++
+LFLAGS =
+CPPFLAGS = -c $(IDIRS) -std=c++0x $(GFLAGS)
+
+ifeq ($(OS), Darwin)
+  CPPFLAGS += -DOS_MACOSX
+else
+  CPPFLAGS += -fno-tree-vrp
+endif
+
+%.o : %.c
+	$(CPP) $(CPPFLAGS) $< -o $@
diff --git a/woff2/Makefile b/woff2/Makefile
new file mode 100644
index 0000000..971feac
--- /dev/null
+++ b/woff2/Makefile
@@ -0,0 +1,28 @@
+#Converter makefile
+
+include ../shared.mk
+
+OUROBJ = font.o glyph.o normalize.o transform.o woff2.o
+
+BROTLI = ../brotli
+ENCOBJ = $(BROTLI)/enc/*.o
+DECOBJ = $(BROTLI)/dec/*.o
+
+OBJS = $(OUROBJ)
+EXECUTABLES=woff2_compress woff2_decompress
+
+EXE_OBJS=$(patsubst %, %.o, $(EXECUTABLES))
+
+all : $(OBJS) $(EXECUTABLES)
+
+$(EXECUTABLES) : $(EXE_OBJS) deps
+	$(CPP) $(LFLAGS) $(OBJS) $(ENCOBJ) $(DECOBJ) $@.o -o $@
+
+deps :
+	make -C $(BROTLI)/dec
+	make -C $(BROTLI)/enc
+
+clean :
+	rm -f $(OBJS) $(EXE_OBJS) $(EXECUTABLES)
+	make -C $(BROTLI)/dec clean
+	make -C $(BROTLI)/enc clean
diff --git a/woff2/file.h b/woff2/file.h
new file mode 100644
index 0000000..f93fdee
--- /dev/null
+++ b/woff2/file.h
@@ -0,0 +1,40 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// File IO helpers
+
+#ifndef BROTLI_WOFF2_FILE_H_
+#define BROTLI_WOFF2_FILE_H_
+
+#include <fstream>
+#include <iterator>
+
+namespace woff2 {
+
+inline std::string GetFileContent(std::string filename) {
+  std::ifstream ifs(filename.c_str(), std::ios::binary);
+  return std::string(
+    std::istreambuf_iterator<char>(ifs.rdbuf()),
+    std::istreambuf_iterator<char>());
+}
+
+inline void SetFileContents(std::string filename, std::string content) {
+  std::ofstream ofs(filename.c_str(), std::ios::binary);
+  std::copy(content.begin(),
+            content.end(),
+            std::ostream_iterator<char>(ofs));
+}
+
+} // namespace woff2
+#endif   //  BROTLI_WOFF2_FILE_H_
diff --git a/woff2/font.cc b/woff2/font.cc
new file mode 100644
index 0000000..2733708
--- /dev/null
+++ b/woff2/font.cc
@@ -0,0 +1,176 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Font management utilities
+
+#include "./font.h"
+
+#include <algorithm>
+
+#include "./ots.h"
+#include "./port.h"
+#include "./store_bytes.h"
+
+namespace woff2 {
+
+Font::Table* Font::FindTable(uint32_t tag) {
+  std::map<uint32_t, Font::Table>::iterator it = tables.find(tag);
+  return it == tables.end() ? 0 : &it->second;
+}
+
+const Font::Table* Font::FindTable(uint32_t tag) const {
+  std::map<uint32_t, Font::Table>::const_iterator it = tables.find(tag);
+  return it == tables.end() ? 0 : &it->second;
+}
+
+bool ReadFont(const uint8_t* data, size_t len, Font* font) {
+  ots::Buffer file(data, len);
+
+  // We don't care about the search_range, entry_selector and range_shift
+  // fields, they will always be computed upon writing the font.
+  if (!file.ReadU32(&font->flavor) ||
+      !file.ReadU16(&font->num_tables) ||
+      !file.Skip(6)) {
+    return OTS_FAILURE();
+  }
+
+  std::map<uint32_t, uint32_t> intervals;
+  for (uint16_t i = 0; i < font->num_tables; ++i) {
+    Font::Table table;
+    if (!file.ReadU32(&table.tag) ||
+        !file.ReadU32(&table.checksum) ||
+        !file.ReadU32(&table.offset) ||
+        !file.ReadU32(&table.length)) {
+      return OTS_FAILURE();
+    }
+    if ((table.offset & 3) != 0 ||
+        table.length > len ||
+        len - table.length < table.offset) {
+      return OTS_FAILURE();
+    }
+    intervals[table.offset] = table.length;
+    table.data = data + table.offset;
+    if (font->tables.find(table.tag) != font->tables.end()) {
+      return OTS_FAILURE();
+    }
+    font->tables[table.tag] = table;
+  }
+
+  // Check that tables are non-overlapping.
+  uint32_t last_offset = 12UL + 16UL * font->num_tables;
+  for (const auto& i : intervals) {
+    if (i.first < last_offset || i.first + i.second < i.first) {
+      return OTS_FAILURE();
+    }
+    last_offset = i.first + i.second;
+  }
+  return true;
+}
+
+size_t FontFileSize(const Font& font) {
+  size_t max_offset = 12ULL + 16ULL * font.num_tables;
+  for (const auto& i : font.tables) {
+    const Font::Table& table = i.second;
+    size_t padding_size = (4 - (table.length & 3)) & 3;
+    size_t end_offset = (padding_size + table.offset) + table.length;
+    max_offset = std::max(max_offset, end_offset);
+  }
+  return max_offset;
+}
+
+bool WriteFont(const Font& font, uint8_t* dst, size_t dst_size) {
+  if (dst_size < 12ULL + 16ULL * font.num_tables) {
+    return OTS_FAILURE();
+  }
+  size_t offset = 0;
+  StoreU32(font.flavor, &offset, dst);
+  Store16(font.num_tables, &offset, dst);
+  uint16_t max_pow2 = font.num_tables ? Log2Floor(font.num_tables) : 0;
+  uint16_t search_range = max_pow2 ? 1 << (max_pow2 + 4) : 0;
+  uint16_t range_shift = (font.num_tables << 4) - search_range;
+  Store16(search_range, &offset, dst);
+  Store16(max_pow2, &offset, dst);
+  Store16(range_shift, &offset, dst);
+  for (const auto& i : font.tables) {
+    const Font::Table& table = i.second;
+    StoreU32(table.tag, &offset, dst);
+    StoreU32(table.checksum, &offset, dst);
+    StoreU32(table.offset, &offset, dst);
+    StoreU32(table.length, &offset, dst);
+    if (table.offset + table.length < table.offset ||
+        dst_size < table.offset + table.length) {
+      return OTS_FAILURE();
+    }
+    memcpy(dst + table.offset, table.data, table.length);
+    size_t padding_size = (4 - (table.length & 3)) & 3;
+    if (table.offset + table.length + padding_size < padding_size ||
+        dst_size < table.offset + table.length + padding_size) {
+      return OTS_FAILURE();
+    }
+    memset(dst + table.offset + table.length, 0, padding_size);
+  }
+  return true;
+}
+
+int NumGlyphs(const Font& font) {
+  const Font::Table* head_table = font.FindTable(kHeadTableTag);
+  const Font::Table* loca_table = font.FindTable(kLocaTableTag);
+  if (head_table == NULL || loca_table == NULL || head_table->length < 52) {
+    return 0;
+  }
+  int index_fmt = head_table->data[51];
+  return (loca_table->length / (index_fmt == 0 ? 2 : 4)) - 1;
+}
+
+bool GetGlyphData(const Font& font, int glyph_index,
+                  const uint8_t** glyph_data, size_t* glyph_size) {
+  if (glyph_index < 0) {
+    return OTS_FAILURE();
+  }
+  const Font::Table* head_table = font.FindTable(kHeadTableTag);
+  const Font::Table* loca_table = font.FindTable(kLocaTableTag);
+  const Font::Table* glyf_table = font.FindTable(kGlyfTableTag);
+  if (head_table == NULL || loca_table == NULL || glyf_table == NULL ||
+      head_table->length < 52) {
+    return OTS_FAILURE();
+  }
+  int index_fmt = head_table->data[51];
+  ots::Buffer loca_buf(loca_table->data, loca_table->length);
+  if (index_fmt == 0) {
+    uint16_t offset1, offset2;
+    if (!loca_buf.Skip(2 * glyph_index) ||
+        !loca_buf.ReadU16(&offset1) ||
+        !loca_buf.ReadU16(&offset2) ||
+        offset2 < offset1 ||
+        2 * offset2 > glyf_table->length) {
+      return OTS_FAILURE();
+    }
+    *glyph_data = glyf_table->data + 2 * offset1;
+    *glyph_size = 2 * (offset2 - offset1);
+  } else {
+    uint32_t offset1, offset2;
+    if (!loca_buf.Skip(4 * glyph_index) ||
+        !loca_buf.ReadU32(&offset1) ||
+        !loca_buf.ReadU32(&offset2) ||
+        offset2 < offset1 ||
+        offset2 > glyf_table->length) {
+      return OTS_FAILURE();
+    }
+    *glyph_data = glyf_table->data + offset1;
+    *glyph_size = offset2 - offset1;
+  }
+  return true;
+}
+
+} // namespace woff2
diff --git a/woff2/font.h b/woff2/font.h
new file mode 100644
index 0000000..21fd634
--- /dev/null
+++ b/woff2/font.h
@@ -0,0 +1,81 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Data model for a font file in sfnt format, reading and writing functions and
+// accessors for the glyph data.
+
+#ifndef BROTLI_WOFF2_FONT_H_
+#define BROTLI_WOFF2_FONT_H_
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <map>
+#include <vector>
+
+namespace woff2 {
+
+// Tags of popular tables.
+static const uint32_t kGlyfTableTag = 0x676c7966;
+static const uint32_t kHeadTableTag = 0x68656164;
+static const uint32_t kLocaTableTag = 0x6c6f6361;
+
+// Represents an sfnt font file. Only the table directory is parsed, for the
+// table data we only store a raw pointer, therefore a font object is valid only
+// as long the data from which it was parsed is around.
+struct Font {
+  uint32_t flavor;
+  uint16_t num_tables;
+
+  struct Table {
+    uint32_t tag;
+    uint32_t checksum;
+    uint32_t offset;
+    uint32_t length;
+    const uint8_t* data;
+
+    // Buffer used to mutate the data before writing out.
+    std::vector<uint8_t> buffer;
+  };
+  std::map<uint32_t, Table> tables;
+
+  Table* FindTable(uint32_t tag);
+  const Table* FindTable(uint32_t tag) const;
+};
+
+// Parses the font from the given data. Returns false on parsing failure or
+// buffer overflow. The font is valid only so long the input data pointer is
+// valid.
+bool ReadFont(const uint8_t* data, size_t len, Font* font);
+
+// Returns the file size of the font.
+size_t FontFileSize(const Font& font);
+
+// Writes the font into the specified dst buffer. The dst_size should be the
+// same as returned by FontFileSize(). Returns false upon buffer overflow (which
+// should not happen if dst_size was computed by FontFileSize()).
+bool WriteFont(const Font& font, uint8_t* dst, size_t dst_size);
+
+// Returns the number of glyphs in the font.
+// NOTE: Currently this works only for TrueType-flavored fonts, will return
+// zero for CFF-flavored fonts.
+int NumGlyphs(const Font& font);
+
+// Sets *glyph_data and *glyph_size to point to the location of the glyph data
+// with the given index. Returns false if the glyph is not found.
+bool GetGlyphData(const Font& font, int glyph_index,
+                  const uint8_t** glyph_data, size_t* glyph_size);
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_FONT_H_
diff --git a/woff2/glyph.cc b/woff2/glyph.cc
new file mode 100644
index 0000000..4752e09
--- /dev/null
+++ b/woff2/glyph.cc
@@ -0,0 +1,380 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Glyph manipulation
+
+#include "./glyph.h"
+
+#include <stdlib.h>
+#include <limits>
+#include "./ots.h"
+#include "./store_bytes.h"
+
+namespace woff2 {
+
+static const int32_t kFLAG_ONCURVE = 1;
+static const int32_t kFLAG_XSHORT = 1 << 1;
+static const int32_t kFLAG_YSHORT = 1 << 2;
+static const int32_t kFLAG_REPEAT = 1 << 3;
+static const int32_t kFLAG_XREPEATSIGN = 1 << 4;
+static const int32_t kFLAG_YREPEATSIGN = 1 << 5;
+static const int32_t kFLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
+static const int32_t kFLAG_WE_HAVE_A_SCALE = 1 << 3;
+static const int32_t kFLAG_MORE_COMPONENTS = 1 << 5;
+static const int32_t kFLAG_WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6;
+static const int32_t kFLAG_WE_HAVE_A_TWO_BY_TWO = 1 << 7;
+static const int32_t kFLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
+
+bool ReadCompositeGlyphData(ots::Buffer* buffer, Glyph* glyph) {
+  glyph->have_instructions = false;
+  glyph->composite_data = buffer->buffer() + buffer->offset();
+  size_t start_offset = buffer->offset();
+  uint16_t flags = kFLAG_MORE_COMPONENTS;
+  while (flags & kFLAG_MORE_COMPONENTS) {
+    if (!buffer->ReadU16(&flags)) {
+      return OTS_FAILURE();
+    }
+    glyph->have_instructions |= (flags & kFLAG_WE_HAVE_INSTRUCTIONS) != 0;
+    size_t arg_size = 2;  // glyph index
+    if (flags & kFLAG_ARG_1_AND_2_ARE_WORDS) {
+      arg_size += 4;
+    } else {
+      arg_size += 2;
+    }
+    if (flags & kFLAG_WE_HAVE_A_SCALE) {
+      arg_size += 2;
+    } else if (flags & kFLAG_WE_HAVE_AN_X_AND_Y_SCALE) {
+      arg_size += 4;
+    } else if (flags & kFLAG_WE_HAVE_A_TWO_BY_TWO) {
+      arg_size += 8;
+    }
+    if (!buffer->Skip(arg_size)) {
+      return OTS_FAILURE();
+    }
+  }
+  if (buffer->offset() - start_offset > std::numeric_limits<uint32_t>::max()) {
+    return OTS_FAILURE();
+  }
+  glyph->composite_data_size = buffer->offset() - start_offset;
+  return true;
+}
+
+bool ReadGlyph(const uint8_t* data, size_t len, Glyph* glyph) {
+  ots::Buffer buffer(data, len);
+
+  int16_t num_contours;
+  if (!buffer.ReadS16(&num_contours)) {
+    return OTS_FAILURE();
+  }
+
+  if (num_contours == 0) {
+    // Empty glyph.
+    return true;
+  }
+
+  // Read the bounding box.
+  if (!buffer.ReadS16(&glyph->x_min) ||
+      !buffer.ReadS16(&glyph->y_min) ||
+      !buffer.ReadS16(&glyph->x_max) ||
+      !buffer.ReadS16(&glyph->y_max)) {
+    return OTS_FAILURE();
+  }
+
+  if (num_contours > 0) {
+    // Simple glyph.
+    glyph->contours.resize(num_contours);
+
+    // Read the number of points per contour.
+    uint16_t last_point_index = 0;
+    for (int i = 0; i < num_contours; ++i) {
+      uint16_t point_index;
+      if (!buffer.ReadU16(&point_index)) {
+        return OTS_FAILURE();
+      }
+      uint16_t num_points = point_index - last_point_index + (i == 0 ? 1 : 0);
+      glyph->contours[i].resize(num_points);
+      last_point_index = point_index;
+    }
+
+    // Read the instructions.
+    if (!buffer.ReadU16(&glyph->instructions_size)) {
+      return OTS_FAILURE();
+    }
+    glyph->instructions_data = data + buffer.offset();
+    if (!buffer.Skip(glyph->instructions_size)) {
+      return OTS_FAILURE();
+    }
+
+    // Read the run-length coded flags.
+    std::vector<std::vector<uint8_t> > flags(num_contours);
+    uint8_t flag = 0;
+    uint8_t flag_repeat = 0;
+    for (int i = 0; i < num_contours; ++i) {
+      flags[i].resize(glyph->contours[i].size());
+      for (int j = 0; j < glyph->contours[i].size(); ++j) {
+        if (flag_repeat == 0) {
+          if (!buffer.ReadU8(&flag)) {
+            return OTS_FAILURE();
+          }
+          if (flag & kFLAG_REPEAT) {
+            if (!buffer.ReadU8(&flag_repeat)) {
+              return OTS_FAILURE();
+            }
+          }
+        } else {
+          flag_repeat--;
+        }
+        flags[i][j] = flag;
+        glyph->contours[i][j].on_curve = flag & kFLAG_ONCURVE;
+      }
+    }
+
+    // Read the x coordinates.
+    int prev_x = 0;
+    for (int i = 0; i < num_contours; ++i) {
+      for (int j = 0; j < glyph->contours[i].size(); ++j) {
+        uint8_t flag = flags[i][j];
+        if (flag & kFLAG_XSHORT) {
+          // single byte x-delta coord value
+          uint8_t x_delta;
+          if (!buffer.ReadU8(&x_delta)) {
+            return OTS_FAILURE();
+          }
+          int sign = (flag & kFLAG_XREPEATSIGN) ? 1 : -1;
+          glyph->contours[i][j].x = prev_x + sign * x_delta;
+        } else {
+          // double byte x-delta coord value
+          int16_t x_delta = 0;
+          if (!(flag & kFLAG_XREPEATSIGN)) {
+            if (!buffer.ReadS16(&x_delta)) {
+              return OTS_FAILURE();
+            }
+          }
+          glyph->contours[i][j].x = prev_x + x_delta;
+        }
+        prev_x = glyph->contours[i][j].x;
+      }
+    }
+
+    // Read the y coordinates.
+    int prev_y = 0;
+    for (int i = 0; i < num_contours; ++i) {
+      for (int j = 0; j < glyph->contours[i].size(); ++j) {
+        uint8_t flag = flags[i][j];
+        if (flag & kFLAG_YSHORT) {
+          // single byte y-delta coord value
+          uint8_t y_delta;
+          if (!buffer.ReadU8(&y_delta)) {
+            return OTS_FAILURE();
+          }
+          int sign = (flag & kFLAG_YREPEATSIGN) ? 1 : -1;
+          glyph->contours[i][j].y = prev_y + sign * y_delta;
+        } else {
+          // double byte y-delta coord value
+          int16_t y_delta = 0;
+          if (!(flag & kFLAG_YREPEATSIGN)) {
+            if (!buffer.ReadS16(&y_delta)) {
+              return OTS_FAILURE();
+            }
+          }
+          glyph->contours[i][j].y = prev_y + y_delta;
+        }
+        prev_y = glyph->contours[i][j].y;
+      }
+    }
+  } else if (num_contours == -1) {
+    // Composite glyph.
+    if (!ReadCompositeGlyphData(&buffer, glyph)) {
+      return OTS_FAILURE();
+    }
+    // Read the instructions.
+    if (glyph->have_instructions) {
+      if (!buffer.ReadU16(&glyph->instructions_size)) {
+        return OTS_FAILURE();
+      }
+      glyph->instructions_data = data + buffer.offset();
+      if (!buffer.Skip(glyph->instructions_size)) {
+        return OTS_FAILURE();
+      }
+    } else {
+      glyph->instructions_size = 0;
+    }
+  } else {
+    return OTS_FAILURE();
+  }
+  return true;
+}
+
+namespace {
+
+void StoreBbox(const Glyph& glyph, size_t* offset, uint8_t* dst) {
+  Store16(glyph.x_min, offset, dst);
+  Store16(glyph.y_min, offset, dst);
+  Store16(glyph.x_max, offset, dst);
+  Store16(glyph.y_max, offset, dst);
+}
+
+void StoreInstructions(const Glyph& glyph, size_t* offset, uint8_t* dst) {
+  Store16(glyph.instructions_size, offset, dst);
+  StoreBytes(glyph.instructions_data, glyph.instructions_size, offset, dst);
+}
+
+bool StoreEndPtsOfContours(const Glyph& glyph, size_t* offset, uint8_t* dst) {
+  int end_point = -1;
+  for (const auto& contour : glyph.contours) {
+    end_point += contour.size();
+    if (contour.size() > std::numeric_limits<uint16_t>::max() ||
+        end_point > std::numeric_limits<uint16_t>::max()) {
+      return OTS_FAILURE();
+    }
+    Store16(end_point, offset, dst);
+  }
+  return true;
+}
+
+bool StorePoints(const Glyph& glyph, size_t* offset,
+                 uint8_t* dst, size_t dst_size) {
+  int last_flag = -1;
+  int repeat_count = 0;
+  int last_x = 0;
+  int last_y = 0;
+  size_t x_bytes = 0;
+  size_t y_bytes = 0;
+
+  // Store the flags and calculate the total size of the x and y coordinates.
+  for (const auto& contour : glyph.contours) {
+    for (const auto& point : contour) {
+      int flag = point.on_curve ? kFLAG_ONCURVE : 0;
+      int dx = point.x - last_x;
+      int dy = point.y - last_y;
+      if (dx == 0) {
+        flag |= kFLAG_XREPEATSIGN;
+      } else if (dx > -256 && dx < 256) {
+        flag |= kFLAG_XSHORT | (dx > 0 ? kFLAG_XREPEATSIGN : 0);
+        x_bytes += 1;
+      } else {
+        x_bytes += 2;
+      }
+      if (dy == 0) {
+        flag |= kFLAG_YREPEATSIGN;
+      } else if (dy > -256 && dy < 256) {
+        flag |= kFLAG_YSHORT | (dy > 0 ? kFLAG_YREPEATSIGN : 0);
+        y_bytes += 1;
+      } else {
+        y_bytes += 2;
+      }
+      if (flag == last_flag && repeat_count != 255) {
+        dst[*offset - 1] |= kFLAG_REPEAT;
+        repeat_count++;
+      } else {
+        if (repeat_count != 0) {
+          if (*offset >= dst_size) {
+            return OTS_FAILURE();
+          }
+          dst[(*offset)++] = repeat_count;
+        }
+        if (*offset >= dst_size) {
+          return OTS_FAILURE();
+        }
+        dst[(*offset)++] = flag;
+        repeat_count = 0;
+      }
+      last_x = point.x;
+      last_y = point.y;
+      last_flag = flag;
+    }
+  }
+  if (repeat_count != 0) {
+    if (*offset >= dst_size) {
+      return OTS_FAILURE();
+    }
+    dst[(*offset)++] = repeat_count;
+  }
+
+  if (*offset + x_bytes + y_bytes > dst_size) {
+    return OTS_FAILURE();
+  }
+
+  // Store the x and y coordinates.
+  size_t x_offset = *offset;
+  size_t y_offset = *offset + x_bytes;
+  last_x = 0;
+  last_y = 0;
+  for (const auto& contour : glyph.contours) {
+    for (const auto& point : contour) {
+      int dx = point.x - last_x;
+      int dy = point.y - last_y;
+      if (dx == 0) {
+        // pass
+      } else if (dx > -256 && dx < 256) {
+        dst[x_offset++] = std::abs(dx);
+      } else {
+        Store16(dx, &x_offset, dst);
+      }
+      if (dy == 0) {
+        // pass
+      } else if (dy > -256 && dy < 256) {
+        dst[y_offset++] = std::abs(dy);
+      } else {
+        Store16(dy, &y_offset, dst);
+      }
+      last_x += dx;
+      last_y += dy;
+    }
+  }
+  *offset = y_offset;
+  return true;
+}
+
+}  // namespace
+
+bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size) {
+  size_t offset = 0;
+  if (glyph.composite_data_size > 0) {
+    // Composite glyph.
+    if (*dst_size < ((10ULL + glyph.composite_data_size) +
+                     ((glyph.have_instructions ? 2ULL : 0) +
+                      glyph.instructions_size))) {
+      return OTS_FAILURE();
+    }
+    Store16(-1, &offset, dst);
+    StoreBbox(glyph, &offset, dst);
+    StoreBytes(glyph.composite_data, glyph.composite_data_size, &offset, dst);
+    if (glyph.have_instructions) {
+      StoreInstructions(glyph, &offset, dst);
+    }
+  } else if (glyph.contours.size() > 0) {
+    // Simple glyph.
+    if (glyph.contours.size() > std::numeric_limits<int16_t>::max()) {
+      return OTS_FAILURE();
+    }
+    if (*dst_size < ((12ULL + 2 * glyph.contours.size()) +
+                     glyph.instructions_size)) {
+      return OTS_FAILURE();
+    }
+    Store16(glyph.contours.size(), &offset, dst);
+    StoreBbox(glyph, &offset, dst);
+    if (!StoreEndPtsOfContours(glyph, &offset, dst)) {
+      return OTS_FAILURE();
+    }
+    StoreInstructions(glyph, &offset, dst);
+    if (!StorePoints(glyph, &offset, dst, *dst_size)) {
+      return OTS_FAILURE();
+    }
+  }
+  *dst_size = offset;
+  return true;
+}
+
+} // namespace woff2
diff --git a/woff2/glyph.h b/woff2/glyph.h
new file mode 100644
index 0000000..2e249f6
--- /dev/null
+++ b/woff2/glyph.h
@@ -0,0 +1,71 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Data model and I/O for glyph data within sfnt format files for the purpose of
+// performing the preprocessing step of the WOFF 2.0 conversion.
+
+#ifndef BROTLI_WOFF2_GLYPH_H_
+#define BROTLI_WOFF2_GLYPH_H_
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <vector>
+
+namespace woff2 {
+
+// Represents a parsed simple or composite glyph. The composite glyph data and
+// instructions are un-parsed and we keep only pointers to the raw data,
+// therefore the glyph is valid only so long the data from which it was parsed
+// is around.
+class Glyph {
+ public:
+  Glyph() : instructions_size(0), composite_data_size(0) {}
+
+  // Bounding box.
+  int16_t x_min;
+  int16_t x_max;
+  int16_t y_min;
+  int16_t y_max;
+
+  // Instructions.
+  uint16_t instructions_size;
+  const uint8_t* instructions_data;
+
+  // Data model for simple glyphs.
+  struct Point {
+    int x;
+    int y;
+    bool on_curve;
+  };
+  std::vector<std::vector<Point> > contours;
+
+  // Data for composite glyphs.
+  const uint8_t* composite_data;
+  uint32_t composite_data_size;
+  bool have_instructions;
+};
+
+// Parses the glyph from the given data. Returns false on parsing failure or
+// buffer overflow. The glyph is valid only so long the input data pointer is
+// valid.
+bool ReadGlyph(const uint8_t* data, size_t len, Glyph* glyph);
+
+// Stores the glyph into the specified dst buffer. The *dst_size is the buffer
+// size on entry and is set to the actual (unpadded) stored size on exit.
+// Returns false on buffer overflow.
+bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size);
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_GLYPH_H_
diff --git a/woff2/normalize.cc b/woff2/normalize.cc
new file mode 100644
index 0000000..ef9f158
--- /dev/null
+++ b/woff2/normalize.cc
@@ -0,0 +1,194 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Glyph normalization
+
+#include "./normalize.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "./ots.h"
+#include "./port.h"
+#include "./font.h"
+#include "./glyph.h"
+#include "./round.h"
+#include "./store_bytes.h"
+
+namespace woff2 {
+
+namespace {
+
+void StoreLoca(int index_fmt, uint32_t value, size_t* offset, uint8_t* dst) {
+  if (index_fmt == 0) {
+    Store16(value >> 1, offset, dst);
+  } else {
+    StoreU32(value, offset, dst);
+  }
+}
+
+void NormalizeSimpleGlyphBoundingBox(Glyph* glyph) {
+  if (glyph->contours.empty() || glyph->contours[0].empty()) {
+    return;
+  }
+  int16_t x_min = glyph->contours[0][0].x;
+  int16_t y_min = glyph->contours[0][0].y;
+  int16_t x_max = x_min;
+  int16_t y_max = y_min;
+  for (const auto& contour : glyph->contours) {
+    for (const auto& point : contour) {
+      if (point.x < x_min) x_min = point.x;
+      if (point.x > x_max) x_max = point.x;
+      if (point.y < y_min) y_min = point.y;
+      if (point.y > y_max) y_max = point.y;
+    }
+  }
+  glyph->x_min = x_min;
+  glyph->y_min = y_min;
+  glyph->x_max = x_max;
+  glyph->y_max = y_max;
+}
+
+}  // namespace
+
+bool NormalizeGlyphs(Font* font) {
+  Font::Table* head_table = font->FindTable(kHeadTableTag);
+  Font::Table* glyf_table = font->FindTable(kGlyfTableTag);
+  Font::Table* loca_table = font->FindTable(kLocaTableTag);
+  if (head_table == NULL || loca_table == NULL || glyf_table == NULL) {
+    return OTS_FAILURE();
+  }
+  int index_fmt = head_table->data[51];
+  int num_glyphs = NumGlyphs(*font);
+
+  // We need to allocate a bit more than its original length for the normalized
+  // glyf table, since it can happen that the glyphs in the original table are
+  // 2-byte aligned, while in the normalized table they are 4-byte aligned.
+  // That gives a maximum of 2 bytes increase per glyph. However, there is no
+  // theoretical guarantee that the total size of the flags plus the coordinates
+  // is the smallest possible in the normalized version, so we have to allow
+  // some general overhead.
+  // TODO(user) Figure out some more precise upper bound on the size of
+  // the overhead.
+  size_t max_normalized_glyf_size = 1.1 * glyf_table->length + 2 * num_glyphs;
+
+  glyf_table->buffer.resize(max_normalized_glyf_size);
+  loca_table->buffer.resize(Round4(loca_table->length));
+  uint8_t* glyf_dst = &glyf_table->buffer[0];
+  uint8_t* loca_dst = &loca_table->buffer[0];
+  uint32_t glyf_offset = 0;
+  size_t loca_offset = 0;
+
+  for (int i = 0; i < num_glyphs; ++i) {
+    StoreLoca(index_fmt, glyf_offset, &loca_offset, loca_dst);
+    Glyph glyph;
+    const uint8_t* glyph_data;
+    size_t glyph_size;
+    if (!GetGlyphData(*font, i, &glyph_data, &glyph_size) ||
+        (glyph_size > 0 && !ReadGlyph(glyph_data, glyph_size, &glyph))) {
+      return OTS_FAILURE();
+    }
+    NormalizeSimpleGlyphBoundingBox(&glyph);
+    size_t glyf_dst_size = glyf_table->buffer.size() - glyf_offset;
+    if (!StoreGlyph(glyph, glyf_dst + glyf_offset, &glyf_dst_size)) {
+      return OTS_FAILURE();
+    }
+    glyf_dst_size = Round4(glyf_dst_size);
+    if (glyf_dst_size > std::numeric_limits<uint32_t>::max() ||
+        glyf_offset + static_cast<uint32_t>(glyf_dst_size) < glyf_offset ||
+        (index_fmt == 0 && glyf_offset + glyf_dst_size >= (1UL << 17))) {
+      return OTS_FAILURE();
+    }
+    glyf_offset += glyf_dst_size;
+  }
+  StoreLoca(index_fmt, glyf_offset, &loca_offset, loca_dst);
+
+  glyf_table->buffer.resize(glyf_offset);
+  glyf_table->data = &glyf_table->buffer[0];
+  glyf_table->length = glyf_offset;
+  loca_table->data = &loca_table->buffer[0];
+
+  return true;
+}
+
+bool NormalizeOffsets(Font* font) {
+  uint32_t offset = 12 + 16 * font->num_tables;
+  for (auto& i : font->tables) {
+    i.second.offset = offset;
+    offset += Round4(i.second.length);
+  }
+  return true;
+}
+
+namespace {
+
+uint32_t ComputeChecksum(const uint8_t* buf, size_t size) {
+  uint32_t checksum = 0;
+  for (size_t i = 0; i < size; i += 4) {
+    checksum += ((buf[i] << 24) |
+                 (buf[i + 1] << 16) |
+                 (buf[i + 2] << 8) |
+                 buf[i + 3]);
+  }
+  return checksum;
+}
+
+uint32_t ComputeHeaderChecksum(const Font& font) {
+  uint32_t checksum = font.flavor;
+  uint16_t max_pow2 = font.num_tables ? Log2Floor(font.num_tables) : 0;
+  uint16_t search_range = max_pow2 ? 1 << (max_pow2 + 4) : 0;
+  uint16_t range_shift = (font.num_tables << 4) - search_range;
+  checksum += (font.num_tables << 16 | search_range);
+  checksum += (max_pow2 << 16 | range_shift);
+  for (const auto& i : font.tables) {
+    checksum += i.second.tag;
+    checksum += i.second.checksum;
+    checksum += i.second.offset;
+    checksum += i.second.length;
+  }
+  return checksum;
+}
+
+}  // namespace
+
+bool FixChecksums(Font* font) {
+  Font::Table* head_table = font->FindTable(kHeadTableTag);
+  if (head_table == NULL || head_table->length < 12) {
+    return OTS_FAILURE();
+  }
+  head_table->buffer.resize(Round4(head_table->length));
+  uint8_t* head_buf = &head_table->buffer[0];
+  memcpy(head_buf, head_table->data, Round4(head_table->length));
+  head_table->data = head_buf;
+  size_t offset = 8;
+  StoreU32(0, &offset, head_buf);
+  uint32_t file_checksum = 0;
+  for (auto& i : font->tables) {
+    Font::Table* table = &i.second;
+    table->checksum = ComputeChecksum(table->data, table->length);
+    file_checksum += table->checksum;
+  }
+  file_checksum += ComputeHeaderChecksum(*font);
+  offset = 8;
+  StoreU32(0xb1b0afba - file_checksum, &offset, head_buf);
+  return true;
+}
+
+bool NormalizeFont(Font* font) {
+  return (NormalizeGlyphs(font) &&
+          NormalizeOffsets(font) &&
+          FixChecksums(font));
+}
+
+} // namespace woff2
diff --git a/woff2/normalize.h b/woff2/normalize.h
new file mode 100644
index 0000000..b3d8331
--- /dev/null
+++ b/woff2/normalize.h
@@ -0,0 +1,45 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for normalizing fonts. Since the WOFF 2.0 decoder creates font
+// files in normalized form, the WOFF 2.0 conversion is guaranteed to be
+// lossless (in a bitwise sense) only for normalized font files.
+
+#ifndef BROTLI_WOFF2_NORMALIZE_H_
+#define BROTLI_WOFF2_NORMALIZE_H_
+
+namespace woff2 {
+
+struct Font;
+
+// Changes the offset fields of the table headers so that the data for the
+// tables will be written in order of increasing tag values, without any gaps
+// other than the 4-byte padding.
+bool NormalizeOffsets(Font* font);
+
+// Changes the checksum fields of the table headers and the checksum field of
+// the head table so that it matches the current data.
+bool FixChecksums(Font* font);
+
+// Parses each of the glyphs in the font and writes them again to the glyf
+// table in normalized form, as defined by the StoreGlyph() function. Changes
+// the loca table accordigly.
+bool NormalizeGlyphs(Font* font);
+
+// Performs all of the normalization steps above.
+bool NormalizeFont(Font* font);
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_NORMALIZE_H_
diff --git a/woff2/ots.h b/woff2/ots.h
new file mode 100644
index 0000000..4eac1cb
--- /dev/null
+++ b/woff2/ots.h
@@ -0,0 +1,153 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The parts of ots.h & opentype-sanitiser.h that we need, taken from the
+// https://code.google.com/p/ots/ project.
+
+#ifndef BROTLI_WOFF2_OTS_H_
+#define BROTLI_WOFF2_OTS_H_
+
+#include <stdint.h>
+#include <arpa/inet.h>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+
+namespace ots {
+
+#if defined(_MSC_VER) || !defined(OTS_DEBUG)
+#define OTS_FAILURE() false
+#else
+#define OTS_FAILURE() ots::Failure(__FILE__, __LINE__, __PRETTY_FUNCTION__)
+inline bool Failure(const char *f, int l, const char *fn) {
+  std::fprintf(stderr, "ERROR at %s:%d (%s)\n", f, l, fn);
+  std::fflush(stderr);
+  return false;
+}
+#endif
+
+// -----------------------------------------------------------------------------
+// Buffer helper class
+//
+// This class perform some trival buffer operations while checking for
+// out-of-bounds errors. As a family they return false if anything is amiss,
+// updating the current offset otherwise.
+// -----------------------------------------------------------------------------
+class Buffer {
+ public:
+  Buffer(const uint8_t *buffer, size_t len)
+      : buffer_(buffer),
+        length_(len),
+        offset_(0) { }
+
+  bool Skip(size_t n_bytes) {
+    return Read(NULL, n_bytes);
+  }
+
+  bool Read(uint8_t *buffer, size_t n_bytes) {
+    if (n_bytes > 1024 * 1024 * 1024) {
+      return OTS_FAILURE();
+    }
+    if ((offset_ + n_bytes > length_) ||
+        (offset_ > length_ - n_bytes)) {
+      return OTS_FAILURE();
+    }
+    if (buffer) {
+      std::memcpy(buffer, buffer_ + offset_, n_bytes);
+    }
+    offset_ += n_bytes;
+    return true;
+  }
+
+  inline bool ReadU8(uint8_t *value) {
+    if (offset_ + 1 > length_) {
+      return OTS_FAILURE();
+    }
+    *value = buffer_[offset_];
+    ++offset_;
+    return true;
+  }
+
+  bool ReadU16(uint16_t *value) {
+    if (offset_ + 2 > length_) {
+      return OTS_FAILURE();
+    }
+    std::memcpy(value, buffer_ + offset_, sizeof(uint16_t));
+    *value = ntohs(*value);
+    offset_ += 2;
+    return true;
+  }
+
+  bool ReadS16(int16_t *value) {
+    return ReadU16(reinterpret_cast<uint16_t*>(value));
+  }
+
+  bool ReadU24(uint32_t *value) {
+    if (offset_ + 3 > length_) {
+      return OTS_FAILURE();
+    }
+    *value = static_cast<uint32_t>(buffer_[offset_]) << 16 |
+        static_cast<uint32_t>(buffer_[offset_ + 1]) << 8 |
+        static_cast<uint32_t>(buffer_[offset_ + 2]);
+    offset_ += 3;
+    return true;
+  }
+
+  bool ReadU32(uint32_t *value) {
+    if (offset_ + 4 > length_) {
+      return OTS_FAILURE();
+    }
+    std::memcpy(value, buffer_ + offset_, sizeof(uint32_t));
+    *value = ntohl(*value);
+    offset_ += 4;
+    return true;
+  }
+
+  bool ReadS32(int32_t *value) {
+    return ReadU32(reinterpret_cast<uint32_t*>(value));
+  }
+
+  bool ReadTag(uint32_t *value) {
+    if (offset_ + 4 > length_) {
+      return OTS_FAILURE();
+    }
+    std::memcpy(value, buffer_ + offset_, sizeof(uint32_t));
+    offset_ += 4;
+    return true;
+  }
+
+  bool ReadR64(uint64_t *value) {
+    if (offset_ + 8 > length_) {
+      return OTS_FAILURE();
+    }
+    std::memcpy(value, buffer_ + offset_, sizeof(uint64_t));
+    offset_ += 8;
+    return true;
+  }
+
+  const uint8_t *buffer() const { return buffer_; }
+  size_t offset() const { return offset_; }
+  size_t length() const { return length_; }
+
+  void set_offset(size_t newoffset) { offset_ = newoffset; }
+
+ private:
+  const uint8_t * const buffer_;
+  const size_t length_;
+  size_t offset_;
+};
+
+}  // namespace ots
+
+#endif  // BROTLI_WOFF2_OTS_H_
diff --git a/woff2/port.h b/woff2/port.h
new file mode 100644
index 0000000..e7a2708
--- /dev/null
+++ b/woff2/port.h
@@ -0,0 +1,46 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper function for bit twiddling
+
+#ifndef BROTLI_WOFF2_PORT_H_
+#define BROTLI_WOFF2_PORT_H_
+
+namespace woff2 {
+
+typedef unsigned int       uint32;
+
+inline int Log2Floor(uint32 n) {
+#if defined(__GNUC__)
+  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+#else
+  if (n == 0)
+    return -1;
+  int log = 0;
+  uint32 value = n;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    uint32 x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  assert(value == 1);
+  return log;
+#endif
+}
+
+} // namespace woff2
+#endif  // BROTLI_WOFF2_PORT_H_
diff --git a/woff2/round.h b/woff2/round.h
new file mode 100644
index 0000000..4d88862
--- /dev/null
+++ b/woff2/round.h
@@ -0,0 +1,33 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper for rounding
+
+#ifndef BROTLI_WOFF2_ROUND_H_
+#define BROTLI_WOFF2_ROUND_H_
+
+namespace woff2 {
+
+// Round a value up to the nearest multiple of 4. Don't round the value in the
+// case that rounding up overflows.
+template<typename T> T Round4(T value) {
+  if (std::numeric_limits<T>::max() - value < 3) {
+    return value;
+  }
+  return (value + 3) & ~3;
+}
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_ROUND_H_
diff --git a/woff2/store_bytes.h b/woff2/store_bytes.h
new file mode 100644
index 0000000..37054b2
--- /dev/null
+++ b/woff2/store_bytes.h
@@ -0,0 +1,61 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper functions for storing integer values into byte streams.
+// No bounds checking is performed, that is the responsibility of the caller.
+
+#ifndef BROTLI_WOFF2_STORE_BYTES_H_
+#define BROTLI_WOFF2_STORE_BYTES_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <string.h>
+
+namespace woff2 {
+
+inline size_t StoreU32(uint8_t* dst, size_t offset, uint32_t x) {
+  dst[offset] = x >> 24;
+  dst[offset + 1] = x >> 16;
+  dst[offset + 2] = x >> 8;
+  dst[offset + 3] = x;
+  return offset + 4;
+}
+
+inline size_t Store16(uint8_t* dst, size_t offset, int x) {
+  dst[offset] = x >> 8;
+  dst[offset + 1] = x;
+  return offset + 2;
+}
+
+inline void StoreU32(uint32_t val, size_t* offset, uint8_t* dst) {
+  dst[(*offset)++] = val >> 24;
+  dst[(*offset)++] = val >> 16;
+  dst[(*offset)++] = val >> 8;
+  dst[(*offset)++] = val;
+}
+
+inline void Store16(int val, size_t* offset, uint8_t* dst) {
+  dst[(*offset)++] = val >> 8;
+  dst[(*offset)++] = val;
+}
+
+inline void StoreBytes(const uint8_t* data, size_t len,
+                       size_t* offset, uint8_t* dst) {
+  memcpy(&dst[*offset], data, len);
+  *offset += len;
+}
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_STORE_BYTES_H_
diff --git a/woff2/transform.cc b/woff2/transform.cc
new file mode 100644
index 0000000..a218ed1
--- /dev/null
+++ b/woff2/transform.cc
@@ -0,0 +1,263 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
+
+#include "./transform.h"
+
+#include <complex>  // for std::abs
+
+#include "./ots.h"
+#include "./font.h"
+#include "./glyph.h"
+
+namespace woff2 {
+
+namespace {
+
+const int FLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
+const int FLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
+
+void WriteBytes(std::vector<uint8_t>* out, const uint8_t* data, size_t len) {
+  if (len == 0) return;
+  size_t offset = out->size();
+  out->resize(offset + len);
+  memcpy(&(*out)[offset], data, len);
+}
+
+void WriteBytes(std::vector<uint8_t>* out, const std::vector<uint8_t>& in) {
+  for (int i = 0; i < in.size(); ++i) {
+    out->push_back(in[i]);
+  }
+}
+
+void WriteUShort(std::vector<uint8_t>* out, int value) {
+  out->push_back(value >> 8);
+  out->push_back(value & 255);
+}
+
+void WriteLong(std::vector<uint8_t>* out, int value) {
+  out->push_back((value >> 24) & 255);
+  out->push_back((value >> 16) & 255);
+  out->push_back((value >> 8) & 255);
+  out->push_back(value & 255);
+}
+
+void Write255UShort(std::vector<uint8_t>* out, int value) {
+  if (value < 253) {
+    out->push_back(value);
+  } else if (value < 506) {
+    out->push_back(255);
+    out->push_back(value - 253);
+  } else if (value < 762) {
+    out->push_back(254);
+    out->push_back(value - 506);
+  } else {
+    out->push_back(253);
+    out->push_back(value >> 8);
+    out->push_back(value & 0xff);
+  }
+}
+
+// Glyf table preprocessing, based on
+// GlyfEncoder.java
+// but only the "sbbox" and "cbbox" options are supported.
+class GlyfEncoder {
+ public:
+  explicit GlyfEncoder(int num_glyphs)
+      : sbbox_(false), cbbox_(true), n_glyphs_(num_glyphs) {
+    bbox_bitmap_.resize(((num_glyphs + 31) >> 5) << 2);
+  }
+
+  bool Encode(int glyph_id, const Glyph& glyph) {
+    if (glyph.composite_data_size > 0) {
+      WriteCompositeGlyph(glyph_id, glyph);
+    } else if (glyph.contours.size() > 0) {
+      WriteSimpleGlyph(glyph_id, glyph);
+    } else {
+      WriteUShort(&n_contour_stream_, 0);
+    }
+    return true;
+  }
+
+  void GetTransformedGlyfBytes(std::vector<uint8_t>* result) {
+    WriteLong(result, 0);  // version
+    WriteUShort(result, n_glyphs_);
+    WriteUShort(result, 0);  // index_format, will be set later
+    WriteLong(result, n_contour_stream_.size());
+    WriteLong(result, n_points_stream_.size());
+    WriteLong(result, flag_byte_stream_.size());
+    WriteLong(result, glyph_stream_.size());
+    WriteLong(result, composite_stream_.size());
+    WriteLong(result, bbox_bitmap_.size() + bbox_stream_.size());
+    WriteLong(result, instruction_stream_.size());
+    WriteBytes(result, n_contour_stream_);
+    WriteBytes(result, n_points_stream_);
+    WriteBytes(result, flag_byte_stream_);
+    WriteBytes(result, glyph_stream_);
+    WriteBytes(result, composite_stream_);
+    WriteBytes(result, bbox_bitmap_);
+    WriteBytes(result, bbox_stream_);
+    WriteBytes(result, instruction_stream_);
+  }
+
+ private:
+  void WriteInstructions(const Glyph& glyph) {
+    Write255UShort(&glyph_stream_, glyph.instructions_size);
+    WriteBytes(&instruction_stream_,
+               glyph.instructions_data, glyph.instructions_size);
+  }
+
+  void WriteSimpleGlyph(int glyph_id, const Glyph& glyph) {
+    int num_contours = glyph.contours.size();
+    WriteUShort(&n_contour_stream_, num_contours);
+    if (sbbox_) {
+      WriteBbox(glyph_id, glyph);
+    }
+    // TODO: check that bbox matches, write bbox if not
+    for (int i = 0; i < num_contours; i++) {
+      Write255UShort(&n_points_stream_, glyph.contours[i].size());
+    }
+    int lastX = 0;
+    int lastY = 0;
+    for (int i = 0; i < num_contours; i++) {
+      int num_points = glyph.contours[i].size();
+      for (int j = 0; j < num_points; j++) {
+        int x = glyph.contours[i][j].x;
+        int y = glyph.contours[i][j].y;
+        int dx = x - lastX;
+        int dy = y - lastY;
+        WriteTriplet(glyph.contours[i][j].on_curve, dx, dy);
+        lastX = x;
+        lastY = y;
+      }
+    }
+    if (num_contours > 0) {
+      WriteInstructions(glyph);
+    }
+  }
+
+  void WriteCompositeGlyph(int glyph_id, const Glyph& glyph) {
+    WriteUShort(&n_contour_stream_, -1);
+    if (cbbox_) {
+      WriteBbox(glyph_id, glyph);
+    }
+    WriteBytes(&composite_stream_,
+               glyph.composite_data,
+               glyph.composite_data_size);
+    if (glyph.have_instructions) {
+      WriteInstructions(glyph);
+    }
+  }
+
+  void WriteBbox(int glyph_id, const Glyph& glyph) {
+    bbox_bitmap_[glyph_id >> 3] |= 0x80 >> (glyph_id & 7);
+    WriteUShort(&bbox_stream_, glyph.x_min);
+    WriteUShort(&bbox_stream_, glyph.y_min);
+    WriteUShort(&bbox_stream_, glyph.x_max);
+    WriteUShort(&bbox_stream_, glyph.y_max);
+  }
+
+  void WriteTriplet(bool on_curve, int x, int y) {
+    int abs_x = std::abs(x);
+    int abs_y = std::abs(y);
+    int on_curve_bit = on_curve ? 0 : 128;
+    int x_sign_bit = (x < 0) ? 0 : 1;
+    int y_sign_bit = (y < 0) ? 0 : 1;
+    int xy_sign_bits = x_sign_bit + 2 * y_sign_bit;
+    if (x == 0 && abs_y < 1280) {
+      flag_byte_stream_.push_back(on_curve_bit +
+                                  ((abs_y & 0xf00) >> 7) + y_sign_bit);
+      glyph_stream_.push_back(abs_y & 0xff);
+    } else if (y == 0 && abs_x < 1280) {
+      flag_byte_stream_.push_back(on_curve_bit + 10 +
+                                  ((abs_x & 0xf00) >> 7) + x_sign_bit);
+      glyph_stream_.push_back(abs_x & 0xff);
+    } else if (abs_x < 65 && abs_y < 65) {
+      flag_byte_stream_.push_back(on_curve_bit + 20 +
+                                  ((abs_x - 1) & 0x30) +
+                                  (((abs_y - 1) & 0x30) >> 2) +
+                                  xy_sign_bits);
+      glyph_stream_.push_back((((abs_x - 1) & 0xf) << 4) | ((abs_y - 1) & 0xf));
+    } else if (abs_x < 769 && abs_y < 769) {
+      flag_byte_stream_.push_back(on_curve_bit + 84 +
+                                  12 * (((abs_x - 1) & 0x300) >> 8) +
+                                  (((abs_y - 1) & 0x300) >> 6) + xy_sign_bits);
+      glyph_stream_.push_back((abs_x - 1) & 0xff);
+      glyph_stream_.push_back((abs_y - 1) & 0xff);
+    } else if (abs_x < 4096 && abs_y < 4096) {
+      flag_byte_stream_.push_back(on_curve_bit + 120 + xy_sign_bits);
+      glyph_stream_.push_back(abs_x >> 4);
+      glyph_stream_.push_back(((abs_x & 0xf) << 4) | (abs_y >> 8));
+      glyph_stream_.push_back(abs_y & 0xff);
+    } else {
+      flag_byte_stream_.push_back(on_curve_bit + 124 + xy_sign_bits);
+      glyph_stream_.push_back(abs_x >> 8);
+      glyph_stream_.push_back(abs_x & 0xff);
+      glyph_stream_.push_back(abs_y >> 8);
+      glyph_stream_.push_back(abs_y & 0xff);
+    }
+  }
+
+  std::vector<uint8_t> n_contour_stream_;
+  std::vector<uint8_t> n_points_stream_;
+  std::vector<uint8_t> flag_byte_stream_;
+  std::vector<uint8_t> composite_stream_;
+  std::vector<uint8_t> bbox_bitmap_;
+  std::vector<uint8_t> bbox_stream_;
+  std::vector<uint8_t> glyph_stream_;
+  std::vector<uint8_t> instruction_stream_;
+  bool sbbox_;
+  bool cbbox_;
+  int n_glyphs_;
+};
+
+}  // namespace
+
+bool TransformGlyfAndLocaTables(Font* font) {
+  Font::Table* transformed_glyf = &font->tables[kGlyfTableTag ^ 0x80808080];
+  Font::Table* transformed_loca = &font->tables[kLocaTableTag ^ 0x80808080];
+
+  int num_glyphs = NumGlyphs(*font);
+  GlyfEncoder encoder(num_glyphs);
+  for (int i = 0; i < num_glyphs; ++i) {
+    Glyph glyph;
+    const uint8_t* glyph_data;
+    size_t glyph_size;
+    if (!GetGlyphData(*font, i, &glyph_data, &glyph_size) ||
+        (glyph_size > 0 && !ReadGlyph(glyph_data, glyph_size, &glyph))) {
+      return OTS_FAILURE();
+    }
+    encoder.Encode(i, glyph);
+  }
+  encoder.GetTransformedGlyfBytes(&transformed_glyf->buffer);
+
+  const Font::Table* head_table = font->FindTable(kHeadTableTag);
+  if (head_table == NULL || head_table->length < 52) {
+    return OTS_FAILURE();
+  }
+  transformed_glyf->buffer[7] = head_table->data[51];  // index_format
+
+  transformed_glyf->tag = kGlyfTableTag ^ 0x80808080;
+  transformed_glyf->length = transformed_glyf->buffer.size();
+  transformed_glyf->data = transformed_glyf->buffer.data();
+
+  transformed_loca->tag = kLocaTableTag ^ 0x80808080;
+  transformed_loca->length = 0;
+  transformed_loca->data = NULL;
+
+  return true;
+}
+
+} // namespace woff2
diff --git a/woff2/transform.h b/woff2/transform.h
new file mode 100644
index 0000000..dd63e73
--- /dev/null
+++ b/woff2/transform.h
@@ -0,0 +1,31 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
+
+#ifndef BROTLI_WOFF2_TRANSFORM_H_
+#define BROTLI_WOFF2_TRANSFORM_H_
+
+#include "./font.h"
+
+namespace woff2 {
+
+// Adds the transformed versions of the glyf and loca tables to the font. The
+// transformed loca table has zero length. The tag of the transformed tables is
+// derived from the original tag by flipping the MSBs of every byte.
+bool TransformGlyfAndLocaTables(Font* font);
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_TRANSFORM_H_
diff --git a/woff2/woff2.cc b/woff2/woff2.cc
new file mode 100644
index 0000000..43e0861
--- /dev/null
+++ b/woff2/woff2.cc
@@ -0,0 +1,1313 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for converting WOFF2 format font files to their TTF versions.
+
+#include "./woff2.h"
+
+#include <stdlib.h>
+#include <complex>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "./ots.h"
+#include "./decode.h"
+#include "./encode.h"
+#include "./font.h"
+#include "./normalize.h"
+#include "./round.h"
+#include "./store_bytes.h"
+#include "./transform.h"
+
+namespace woff2 {
+
+namespace {
+
+using std::string;
+using std::vector;
+
+
+
+// simple glyph flags
+const int kGlyfOnCurve = 1 << 0;
+const int kGlyfXShort = 1 << 1;
+const int kGlyfYShort = 1 << 2;
+const int kGlyfRepeat = 1 << 3;
+const int kGlyfThisXIsSame = 1 << 4;
+const int kGlyfThisYIsSame = 1 << 5;
+
+// composite glyph flags
+const int FLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
+const int FLAG_ARGS_ARE_XY_VALUES = 1 << 1;
+const int FLAG_ROUND_XY_TO_GRID = 1 << 2;
+const int FLAG_WE_HAVE_A_SCALE = 1 << 3;
+const int FLAG_RESERVED = 1 << 4;
+const int FLAG_MORE_COMPONENTS = 1 << 5;
+const int FLAG_WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6;
+const int FLAG_WE_HAVE_A_TWO_BY_TWO = 1 << 7;
+const int FLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
+const int FLAG_USE_MY_METRICS = 1 << 9;
+const int FLAG_OVERLAP_COMPOUND = 1 << 10;
+const int FLAG_SCALED_COMPONENT_OFFSET = 1 << 11;
+const int FLAG_UNSCALED_COMPONENT_OFFSET = 1 << 12;
+
+const size_t kSfntHeaderSize = 12;
+const size_t kSfntEntrySize = 16;
+const size_t kCheckSumAdjustmentOffset = 8;
+
+const size_t kEndPtsOfContoursOffset = 10;
+const size_t kCompositeGlyphBegin = 10;
+
+// Note that the byte order is big-endian, not the same as ots.cc
+#define TAG(a, b, c, d) ((a << 24) | (b << 16) | (c << 8) | d)
+
+const uint32_t kWoff2Signature = 0x774f4632;  // "wOF2"
+
+const unsigned int kWoff2FlagsContinueStream = 1 << 4;
+const unsigned int kWoff2FlagsTransform = 1 << 5;
+
+const size_t kWoff2HeaderSize = 44;
+const size_t kWoff2EntrySize = 20;
+
+const size_t kLzmaHeaderSize = 13;
+
+// Compression type values common to both short and long formats
+const uint32_t kCompressionTypeMask = 0xf;
+const uint32_t kCompressionTypeNone = 0;
+const uint32_t kCompressionTypeGzip = 1;
+const uint32_t kCompressionTypeLzma = 2;
+const uint32_t kCompressionTypeBrotli = 3;
+const uint32_t kCompressionTypeLzham = 4;
+
+// This is a special value for the short format only, as described in
+// "Design for compressed header format" in draft doc.
+const uint32_t kShortFlagsContinue = 3;
+
+struct Point {
+  int x;
+  int y;
+  bool on_curve;
+};
+
+struct Table {
+  uint32_t tag;
+  uint32_t flags;
+  uint32_t src_offset;
+  uint32_t src_length;
+
+  uint32_t transform_length;
+
+  uint32_t dst_offset;
+  uint32_t dst_length;
+  const uint8_t* dst_data;
+};
+
+// Based on section 6.1.1 of MicroType Express draft spec
+bool Read255UShort(ots::Buffer* buf, unsigned int* value) {
+  static const int kWordCode = 253;
+  static const int kOneMoreByteCode2 = 254;
+  static const int kOneMoreByteCode1 = 255;
+  static const int kLowestUCode = 253;
+  uint8_t code = 0;
+  if (!buf->ReadU8(&code)) {
+    return OTS_FAILURE();
+  }
+  if (code == kWordCode) {
+    uint16_t result = 0;
+    if (!buf->ReadU16(&result)) {
+      return OTS_FAILURE();
+    }
+    *value = result;
+    return true;
+  } else if (code == kOneMoreByteCode1) {
+    uint8_t result = 0;
+    if (!buf->ReadU8(&result)) {
+      return OTS_FAILURE();
+    }
+    *value = result + kLowestUCode;
+    return true;
+  } else if (code == kOneMoreByteCode2) {
+    uint8_t result = 0;
+    if (!buf->ReadU8(&result)) {
+      return OTS_FAILURE();
+    }
+    *value = result + kLowestUCode * 2;
+    return true;
+  } else {
+    *value = code;
+    return true;
+  }
+}
+
+bool ReadBase128(ots::Buffer* buf, uint32_t* value) {
+  uint32_t result = 0;
+  for (size_t i = 0; i < 5; ++i) {
+    uint8_t code = 0;
+    if (!buf->ReadU8(&code)) {
+      return OTS_FAILURE();
+    }
+    // If any of the top seven bits are set then we're about to overflow.
+    if (result & 0xe0000000) {
+      return OTS_FAILURE();
+    }
+    result = (result << 7) | (code & 0x7f);
+    if ((code & 0x80) == 0) {
+      *value = result;
+      return true;
+    }
+  }
+  // Make sure not to exceed the size bound
+  return OTS_FAILURE();
+}
+
+size_t Base128Size(size_t n) {
+  size_t size = 1;
+  for (; n >= 128; n >>= 7) ++size;
+  return size;
+}
+
+void StoreBase128(size_t len, size_t* offset, uint8_t* dst) {
+  size_t size = Base128Size(len);
+  for (int i = 0; i < size; ++i) {
+    int b = (int)(len >> (7 * (size - i - 1))) & 0x7f;
+    if (i < size - 1) {
+      b |= 0x80;
+    }
+    dst[(*offset)++] = b;
+  }
+}
+
+int WithSign(int flag, int baseval) {
+  // Precondition: 0 <= baseval < 65536 (to avoid integer overflow)
+  return (flag & 1) ? baseval : -baseval;
+}
+
+bool TripletDecode(const uint8_t* flags_in, const uint8_t* in, size_t in_size,
+    unsigned int n_points, std::vector<Point>* result,
+    size_t* in_bytes_consumed) {
+  int x = 0;
+  int y = 0;
+
+  if (n_points > in_size) {
+    return OTS_FAILURE();
+  }
+  unsigned int triplet_index = 0;
+
+  for (unsigned int i = 0; i < n_points; ++i) {
+    uint8_t flag = flags_in[i];
+    bool on_curve = !(flag >> 7);
+    flag &= 0x7f;
+    unsigned int n_data_bytes;
+    if (flag < 84) {
+      n_data_bytes = 1;
+    } else if (flag < 120) {
+      n_data_bytes = 2;
+    } else if (flag < 124) {
+      n_data_bytes = 3;
+    } else {
+      n_data_bytes = 4;
+    }
+    if (triplet_index + n_data_bytes > in_size ||
+        triplet_index + n_data_bytes < triplet_index) {
+      return OTS_FAILURE();
+    }
+    int dx, dy;
+    if (flag < 10) {
+      dx = 0;
+      dy = WithSign(flag, ((flag & 14) << 7) + in[triplet_index]);
+    } else if (flag < 20) {
+      dx = WithSign(flag, (((flag - 10) & 14) << 7) + in[triplet_index]);
+      dy = 0;
+    } else if (flag < 84) {
+      int b0 = flag - 20;
+      int b1 = in[triplet_index];
+      dx = WithSign(flag, 1 + (b0 & 0x30) + (b1 >> 4));
+      dy = WithSign(flag >> 1, 1 + ((b0 & 0x0c) << 2) + (b1 & 0x0f));
+    } else if (flag < 120) {
+      int b0 = flag - 84;
+      dx = WithSign(flag, 1 + ((b0 / 12) << 8) + in[triplet_index]);
+      dy = WithSign(flag >> 1,
+                    1 + (((b0 % 12) >> 2) << 8) + in[triplet_index + 1]);
+    } else if (flag < 124) {
+      int b2 = in[triplet_index + 1];
+      dx = WithSign(flag, (in[triplet_index] << 4) + (b2 >> 4));
+      dy = WithSign(flag >> 1, ((b2 & 0x0f) << 8) + in[triplet_index + 2]);
+    } else {
+      dx = WithSign(flag, (in[triplet_index] << 8) + in[triplet_index + 1]);
+      dy = WithSign(flag >> 1,
+          (in[triplet_index + 2] << 8) + in[triplet_index + 3]);
+    }
+    triplet_index += n_data_bytes;
+    // Possible overflow but coordinate values are not security sensitive
+    x += dx;
+    y += dy;
+    result->push_back(Point());
+    Point& back = result->back();
+    back.x = x;
+    back.y = y;
+    back.on_curve = on_curve;
+  }
+  *in_bytes_consumed = triplet_index;
+  return true;
+}
+
+// This function stores just the point data. On entry, dst points to the
+// beginning of a simple glyph. Returns true on success.
+bool StorePoints(const std::vector<Point>& points,
+    unsigned int n_contours, unsigned int instruction_length,
+    uint8_t* dst, size_t dst_size, size_t* glyph_size) {
+  // I believe that n_contours < 65536, in which case this is safe. However, a
+  // comment and/or an assert would be good.
+  unsigned int flag_offset = kEndPtsOfContoursOffset + 2 * n_contours + 2 +
+    instruction_length;
+  int last_flag = -1;
+  int repeat_count = 0;
+  int last_x = 0;
+  int last_y = 0;
+  unsigned int x_bytes = 0;
+  unsigned int y_bytes = 0;
+
+  for (unsigned int i = 0; i < points.size(); ++i) {
+    const Point& point = points[i];
+    int flag = point.on_curve ? kGlyfOnCurve : 0;
+    int dx = point.x - last_x;
+    int dy = point.y - last_y;
+    if (dx == 0) {
+      flag |= kGlyfThisXIsSame;
+    } else if (dx > -256 && dx < 256) {
+      flag |= kGlyfXShort | (dx > 0 ? kGlyfThisXIsSame : 0);
+      x_bytes += 1;
+    } else {
+      x_bytes += 2;
+    }
+    if (dy == 0) {
+      flag |= kGlyfThisYIsSame;
+    } else if (dy > -256 && dy < 256) {
+      flag |= kGlyfYShort | (dy > 0 ? kGlyfThisYIsSame : 0);
+      y_bytes += 1;
+    } else {
+      y_bytes += 2;
+    }
+
+    if (flag == last_flag && repeat_count != 255) {
+      dst[flag_offset - 1] |= kGlyfRepeat;
+      repeat_count++;
+    } else {
+      if (repeat_count != 0) {
+        if (flag_offset >= dst_size) {
+          return OTS_FAILURE();
+        }
+        dst[flag_offset++] = repeat_count;
+      }
+      if (flag_offset >= dst_size) {
+        return OTS_FAILURE();
+      }
+      dst[flag_offset++] = flag;
+      repeat_count = 0;
+    }
+    last_x = point.x;
+    last_y = point.y;
+    last_flag = flag;
+  }
+
+  if (repeat_count != 0) {
+    if (flag_offset >= dst_size) {
+      return OTS_FAILURE();
+    }
+    dst[flag_offset++] = repeat_count;
+  }
+  unsigned int xy_bytes = x_bytes + y_bytes;
+  if (xy_bytes < x_bytes ||
+      flag_offset + xy_bytes < flag_offset ||
+      flag_offset + xy_bytes > dst_size) {
+    return OTS_FAILURE();
+  }
+
+  int x_offset = flag_offset;
+  int y_offset = flag_offset + x_bytes;
+  last_x = 0;
+  last_y = 0;
+  for (unsigned int i = 0; i < points.size(); ++i) {
+    int dx = points[i].x - last_x;
+    if (dx == 0) {
+      // pass
+    } else if (dx > -256 && dx < 256) {
+      dst[x_offset++] = std::abs(dx);
+    } else {
+      // will always fit for valid input, but overflow is harmless
+      x_offset = Store16(dst, x_offset, dx);
+    }
+    last_x += dx;
+    int dy = points[i].y - last_y;
+    if (dy == 0) {
+      // pass
+    } else if (dy > -256 && dy < 256) {
+      dst[y_offset++] = std::abs(dy);
+    } else {
+      y_offset = Store16(dst, y_offset, dy);
+    }
+    last_y += dy;
+  }
+  *glyph_size = y_offset;
+  return true;
+}
+
+// Compute the bounding box of the coordinates, and store into a glyf buffer.
+// A precondition is that there are at least 10 bytes available.
+void ComputeBbox(const std::vector<Point>& points, uint8_t* dst) {
+  int x_min = 0;
+  int y_min = 0;
+  int x_max = 0;
+  int y_max = 0;
+
+  for (unsigned int i = 0; i < points.size(); ++i) {
+    int x = points[i].x;
+    int y = points[i].y;
+    if (i == 0 || x < x_min) x_min = x;
+    if (i == 0 || x > x_max) x_max = x;
+    if (i == 0 || y < y_min) y_min = y;
+    if (i == 0 || y > y_max) y_max = y;
+  }
+  size_t offset = 2;
+  offset = Store16(dst, offset, x_min);
+  offset = Store16(dst, offset, y_min);
+  offset = Store16(dst, offset, x_max);
+  offset = Store16(dst, offset, y_max);
+}
+
+// Process entire bbox stream. This is done as a separate pass to allow for
+// composite bbox computations (an optional more aggressive transform).
+bool ProcessBboxStream(ots::Buffer* bbox_stream, unsigned int n_glyphs,
+    const std::vector<uint32_t>& loca_values, uint8_t* glyf_buf,
+    size_t glyf_buf_length) {
+  const uint8_t* buf = bbox_stream->buffer();
+  if (n_glyphs >= 65536 || loca_values.size() != n_glyphs + 1) {
+    return OTS_FAILURE();
+  }
+  // Safe because n_glyphs is bounded
+  unsigned int bitmap_length = ((n_glyphs + 31) >> 5) << 2;
+  if (!bbox_stream->Skip(bitmap_length)) {
+    return OTS_FAILURE();
+  }
+  for (unsigned int i = 0; i < n_glyphs; ++i) {
+    if (buf[i >> 3] & (0x80 >> (i & 7))) {
+      uint32_t loca_offset = loca_values[i];
+      if (loca_values[i + 1] - loca_offset < kEndPtsOfContoursOffset) {
+        return OTS_FAILURE();
+      }
+      if (glyf_buf_length < 2 + 10 ||
+          loca_offset > glyf_buf_length - 2 - 10) {
+        return OTS_FAILURE();
+      }
+      if (!bbox_stream->Read(glyf_buf + loca_offset + 2, 8)) {
+        return OTS_FAILURE();
+      }
+    }
+  }
+  return true;
+}
+
+bool ProcessComposite(ots::Buffer* composite_stream, uint8_t* dst,
+    size_t dst_size, size_t* glyph_size, bool* have_instructions) {
+  size_t start_offset = composite_stream->offset();
+  bool we_have_instructions = false;
+
+  uint16_t flags = FLAG_MORE_COMPONENTS;
+  while (flags & FLAG_MORE_COMPONENTS) {
+    if (!composite_stream->ReadU16(&flags)) {
+      return OTS_FAILURE();
+    }
+    we_have_instructions |= (flags & FLAG_WE_HAVE_INSTRUCTIONS) != 0;
+    size_t arg_size = 2;  // glyph index
+    if (flags & FLAG_ARG_1_AND_2_ARE_WORDS) {
+      arg_size += 4;
+    } else {
+      arg_size += 2;
+    }
+    if (flags & FLAG_WE_HAVE_A_SCALE) {
+      arg_size += 2;
+    } else if (flags & FLAG_WE_HAVE_AN_X_AND_Y_SCALE) {
+      arg_size += 4;
+    } else if (flags & FLAG_WE_HAVE_A_TWO_BY_TWO) {
+      arg_size += 8;
+    }
+    if (!composite_stream->Skip(arg_size)) {
+      return OTS_FAILURE();
+    }
+  }
+  size_t composite_glyph_size = composite_stream->offset() - start_offset;
+  if (composite_glyph_size + kCompositeGlyphBegin > dst_size) {
+    return OTS_FAILURE();
+  }
+  Store16(dst, 0, 0xffff);  // nContours = -1 for composite glyph
+  std::memcpy(dst + kCompositeGlyphBegin,
+      composite_stream->buffer() + start_offset,
+      composite_glyph_size);
+  *glyph_size = kCompositeGlyphBegin + composite_glyph_size;
+  *have_instructions = we_have_instructions;
+  return true;
+}
+
+// Build TrueType loca table
+bool StoreLoca(const std::vector<uint32_t>& loca_values, int index_format,
+    uint8_t* dst, size_t dst_size) {
+  const uint64_t loca_size = loca_values.size();
+  const uint64_t offset_size = index_format ? 4 : 2;
+  if ((loca_size << 2) >> 2 != loca_size) {
+    return OTS_FAILURE();
+  }
+  if (offset_size * loca_size > dst_size) {
+    return OTS_FAILURE();
+  }
+  size_t offset = 0;
+  for (size_t i = 0; i < loca_values.size(); ++i) {
+    uint32_t value = loca_values[i];
+    if (index_format) {
+      offset = StoreU32(dst, offset, value);
+    } else {
+      offset = Store16(dst, offset, value >> 1);
+    }
+  }
+  return true;
+}
+
+// Reconstruct entire glyf table based on transformed original
+bool ReconstructGlyf(const uint8_t* data, size_t data_size,
+    uint8_t* dst, size_t dst_size,
+    uint8_t* loca_buf, size_t loca_size) {
+  static const int kNumSubStreams = 7;
+  ots::Buffer file(data, data_size);
+  uint32_t version;
+  std::vector<std::pair<const uint8_t*, size_t> > substreams(kNumSubStreams);
+
+  if (!file.ReadU32(&version)) {
+    return OTS_FAILURE();
+  }
+  uint16_t num_glyphs;
+  uint16_t index_format;
+  if (!file.ReadU16(&num_glyphs) ||
+      !file.ReadU16(&index_format)) {
+    return OTS_FAILURE();
+  }
+  unsigned int offset = (2 + kNumSubStreams) * 4;
+  if (offset > data_size) {
+    return OTS_FAILURE();
+  }
+  // Invariant from here on: data_size >= offset
+  for (int i = 0; i < kNumSubStreams; ++i) {
+    uint32_t substream_size;
+    if (!file.ReadU32(&substream_size)) {
+      return OTS_FAILURE();
+    }
+    if (substream_size > data_size - offset) {
+      return OTS_FAILURE();
+    }
+    substreams[i] = std::make_pair(data + offset, substream_size);
+    offset += substream_size;
+  }
+  ots::Buffer n_contour_stream(substreams[0].first, substreams[0].second);
+  ots::Buffer n_points_stream(substreams[1].first, substreams[1].second);
+  ots::Buffer flag_stream(substreams[2].first, substreams[2].second);
+  ots::Buffer glyph_stream(substreams[3].first, substreams[3].second);
+  ots::Buffer composite_stream(substreams[4].first, substreams[4].second);
+  ots::Buffer bbox_stream(substreams[5].first, substreams[5].second);
+  ots::Buffer instruction_stream(substreams[6].first, substreams[6].second);
+
+  std::vector<uint32_t> loca_values(num_glyphs + 1);
+  std::vector<unsigned int> n_points_vec;
+  std::vector<Point> points;
+  uint32_t loca_offset = 0;
+  for (unsigned int i = 0; i < num_glyphs; ++i) {
+    size_t glyph_size = 0;
+    uint16_t n_contours = 0;
+    if (!n_contour_stream.ReadU16(&n_contours)) {
+      return OTS_FAILURE();
+    }
+    uint8_t* glyf_dst = dst + loca_offset;
+    size_t glyf_dst_size = dst_size - loca_offset;
+    if (n_contours == 0xffff) {
+      // composite glyph
+      bool have_instructions = false;
+      unsigned int instruction_size = 0;
+      if (!ProcessComposite(&composite_stream, glyf_dst, glyf_dst_size,
+            &glyph_size, &have_instructions)) {
+        return OTS_FAILURE();
+      }
+      if (have_instructions) {
+        if (!Read255UShort(&glyph_stream, &instruction_size)) {
+          return OTS_FAILURE();
+        }
+        if (instruction_size + 2 > glyf_dst_size - glyph_size) {
+          return OTS_FAILURE();
+        }
+        Store16(glyf_dst, glyph_size, instruction_size);
+        if (!instruction_stream.Read(glyf_dst + glyph_size + 2,
+              instruction_size)) {
+          return OTS_FAILURE();
+        }
+        glyph_size += instruction_size + 2;
+      }
+    } else if (n_contours > 0) {
+      // simple glyph
+      n_points_vec.clear();
+      points.clear();
+      unsigned int total_n_points = 0;
+      unsigned int n_points_contour;
+      for (unsigned int j = 0; j < n_contours; ++j) {
+        if (!Read255UShort(&n_points_stream, &n_points_contour)) {
+          return OTS_FAILURE();
+        }
+        n_points_vec.push_back(n_points_contour);
+        if (total_n_points + n_points_contour < total_n_points) {
+          return OTS_FAILURE();
+        }
+        total_n_points += n_points_contour;
+      }
+      unsigned int flag_size = total_n_points;
+      if (flag_size > flag_stream.length() - flag_stream.offset()) {
+        return OTS_FAILURE();
+      }
+      const uint8_t* flags_buf = flag_stream.buffer() + flag_stream.offset();
+      const uint8_t* triplet_buf = glyph_stream.buffer() +
+        glyph_stream.offset();
+      size_t triplet_size = glyph_stream.length() - glyph_stream.offset();
+      size_t triplet_bytes_consumed = 0;
+      if (!TripletDecode(flags_buf, triplet_buf, triplet_size, total_n_points,
+            &points, &triplet_bytes_consumed)) {
+        return OTS_FAILURE();
+      }
+      const uint32_t header_and_endpts_contours_size =
+          kEndPtsOfContoursOffset + 2 * n_contours;
+      if (glyf_dst_size < header_and_endpts_contours_size) {
+        return OTS_FAILURE();
+      }
+      Store16(glyf_dst, 0, n_contours);
+      ComputeBbox(points, glyf_dst);
+      size_t offset = kEndPtsOfContoursOffset;
+      int end_point = -1;
+      for (unsigned int contour_ix = 0; contour_ix < n_contours; ++contour_ix) {
+        end_point += n_points_vec[contour_ix];
+        if (end_point >= 65536) {
+          return OTS_FAILURE();
+        }
+        offset = Store16(glyf_dst, offset, end_point);
+      }
+      if (!flag_stream.Skip(flag_size)) {
+        return OTS_FAILURE();
+      }
+      if (!glyph_stream.Skip(triplet_bytes_consumed)) {
+        return OTS_FAILURE();
+      }
+      unsigned int instruction_size;
+      if (!Read255UShort(&glyph_stream, &instruction_size)) {
+        return OTS_FAILURE();
+      }
+      if (glyf_dst_size - header_and_endpts_contours_size <
+          instruction_size + 2) {
+        return OTS_FAILURE();
+      }
+      uint8_t* instruction_dst = glyf_dst + header_and_endpts_contours_size;
+      Store16(instruction_dst, 0, instruction_size);
+      if (!instruction_stream.Read(instruction_dst + 2, instruction_size)) {
+        return OTS_FAILURE();
+      }
+      if (!StorePoints(points, n_contours, instruction_size,
+            glyf_dst, glyf_dst_size, &glyph_size)) {
+        return OTS_FAILURE();
+      }
+    } else {
+      glyph_size = 0;
+    }
+    loca_values[i] = loca_offset;
+    if (glyph_size + 3 < glyph_size) {
+      return OTS_FAILURE();
+    }
+    glyph_size = Round4(glyph_size);
+    if (glyph_size > dst_size - loca_offset) {
+      // This shouldn't happen, but this test defensively maintains the
+      // invariant that loca_offset <= dst_size.
+      return OTS_FAILURE();
+    }
+    loca_offset += glyph_size;
+  }
+  loca_values[num_glyphs] = loca_offset;
+  if (!ProcessBboxStream(&bbox_stream, num_glyphs, loca_values,
+          dst, dst_size)) {
+    return OTS_FAILURE();
+  }
+  return StoreLoca(loca_values, index_format, loca_buf, loca_size);
+}
+
+// This is linear search, but could be changed to binary because we
+// do have a guarantee that the tables are sorted by tag. But the total
+// cpu time is expected to be very small in any case.
+const Table* FindTable(const std::vector<Table>& tables, uint32_t tag) {
+  size_t n_tables = tables.size();
+  for (size_t i = 0; i < n_tables; ++i) {
+    if (tables[i].tag == tag) {
+      return &tables[i];
+    }
+  }
+  return NULL;
+}
+
+bool ReconstructTransformed(const std::vector<Table>& tables, uint32_t tag,
+    const uint8_t* transformed_buf, size_t transformed_size,
+    uint8_t* dst, size_t dst_length) {
+  if (tag == TAG('g', 'l', 'y', 'f')) {
+    const Table* glyf_table = FindTable(tables, tag);
+    const Table* loca_table = FindTable(tables, TAG('l', 'o', 'c', 'a'));
+    if (glyf_table == NULL || loca_table == NULL) {
+      return OTS_FAILURE();
+    }
+    if (static_cast<uint64_t>(glyf_table->dst_offset + glyf_table->dst_length) >
+        dst_length) {
+      return OTS_FAILURE();
+    }
+    if (static_cast<uint64_t>(loca_table->dst_offset + loca_table->dst_length) >
+        dst_length) {
+      return OTS_FAILURE();
+    }
+    return ReconstructGlyf(transformed_buf, transformed_size,
+        dst + glyf_table->dst_offset, glyf_table->dst_length,
+        dst + loca_table->dst_offset, loca_table->dst_length);
+  } else if (tag == TAG('l', 'o', 'c', 'a')) {
+    // processing was already done by glyf table, but validate
+    if (!FindTable(tables, TAG('g', 'l', 'y', 'f'))) {
+      return OTS_FAILURE();
+    }
+  } else {
+    // transform for the tag is not known
+    return OTS_FAILURE();
+  }
+  return true;
+}
+
+uint32_t ComputeChecksum(const uint8_t* buf, size_t size) {
+  uint32_t checksum = 0;
+  for (size_t i = 0; i < size; i += 4) {
+    // We assume the addition is mod 2^32, which is valid because unsigned
+    checksum += (buf[i] << 24) | (buf[i + 1] << 16) |
+      (buf[i + 2] << 8) | buf[i + 3];
+  }
+  return checksum;
+}
+
+bool FixChecksums(const std::vector<Table>& tables, uint8_t* dst) {
+  const Table* head_table = FindTable(tables, TAG('h', 'e', 'a', 'd'));
+  if (head_table == NULL ||
+      head_table->dst_length < kCheckSumAdjustmentOffset + 4) {
+    return OTS_FAILURE();
+  }
+  size_t adjustment_offset = head_table->dst_offset + kCheckSumAdjustmentOffset;
+  StoreU32(dst, adjustment_offset, 0);
+  size_t n_tables = tables.size();
+  uint32_t file_checksum = 0;
+  for (size_t i = 0; i < n_tables; ++i) {
+    const Table* table = &tables[i];
+    size_t table_length = table->dst_length;
+    uint8_t* table_data = dst + table->dst_offset;
+    uint32_t checksum = ComputeChecksum(table_data, table_length);
+    StoreU32(dst, kSfntHeaderSize + i * kSfntEntrySize + 4, checksum);
+    file_checksum += checksum;
+  }
+  file_checksum += ComputeChecksum(dst,
+      kSfntHeaderSize + kSfntEntrySize * n_tables);
+  uint32_t checksum_adjustment = 0xb1b0afba - file_checksum;
+  StoreU32(dst, adjustment_offset, checksum_adjustment);
+  return true;
+}
+
+bool Woff2Compress(const uint8_t* data, const size_t len,
+                   uint32_t compression_type,
+                   uint8_t* result, uint32_t* result_len) {
+  if (compression_type == kCompressionTypeBrotli) {
+    size_t compressed_len = *result_len;
+    
+    brotli::BrotliCompressBuffer(len, data, &compressed_len, result);
+    *result_len = compressed_len;
+    return true;
+  }
+  return false;
+}
+
+bool Woff2Uncompress(uint8_t* dst_buf, size_t dst_size,
+    const uint8_t* src_buf, size_t src_size, uint32_t compression_type) {
+  if (compression_type == kCompressionTypeBrotli) {
+    size_t uncompressed_size = dst_size;
+    int ok = BrotliDecompressBuffer(src_size, src_buf,
+                                    &uncompressed_size, dst_buf);
+    if (!ok || uncompressed_size != dst_size) {
+      return OTS_FAILURE();
+    }
+    return true;
+  }
+  // Unknown compression type
+  return OTS_FAILURE();
+}
+
+bool ReadLongDirectory(ots::Buffer* file, std::vector<Table>* tables,
+    size_t num_tables) {
+  for (size_t i = 0; i < num_tables; ++i) {
+    Table* table = &(*tables)[i];
+    if (!file->ReadU32(&table->tag) ||
+        !file->ReadU32(&table->flags) ||
+        !file->ReadU32(&table->src_length) ||
+        !file->ReadU32(&table->transform_length) ||
+        !file->ReadU32(&table->dst_length)) {
+      return OTS_FAILURE();
+    }
+  }
+  return true;
+}
+
+const uint32_t known_tags[29] = {
+  TAG('c', 'm', 'a', 'p'),  // 0
+  TAG('h', 'e', 'a', 'd'),  // 1
+  TAG('h', 'h', 'e', 'a'),  // 2
+  TAG('h', 'm', 't', 'x'),  // 3
+  TAG('m', 'a', 'x', 'p'),  // 4
+  TAG('n', 'a', 'm', 'e'),  // 5
+  TAG('O', 'S', '/', '2'),  // 6
+  TAG('p', 'o', 's', 't'),  // 7
+  TAG('c', 'v', 't', ' '),  // 8
+  TAG('f', 'p', 'g', 'm'),  // 9
+  TAG('g', 'l', 'y', 'f'),  // 10
+  TAG('l', 'o', 'c', 'a'),  // 11
+  TAG('p', 'r', 'e', 'p'),  // 12
+  TAG('C', 'F', 'F', ' '),  // 13
+  TAG('V', 'O', 'R', 'G'),  // 14
+  TAG('E', 'B', 'D', 'T'),  // 15
+  TAG('E', 'B', 'L', 'C'),  // 16
+  TAG('g', 'a', 's', 'p'),  // 17
+  TAG('h', 'd', 'm', 'x'),  // 18
+  TAG('k', 'e', 'r', 'n'),  // 19
+  TAG('L', 'T', 'S', 'H'),  // 20
+  TAG('P', 'C', 'L', 'T'),  // 21
+  TAG('V', 'D', 'M', 'X'),  // 22
+  TAG('v', 'h', 'e', 'a'),  // 23
+  TAG('v', 'm', 't', 'x'),  // 24
+  TAG('B', 'A', 'S', 'E'),  // 25
+  TAG('G', 'D', 'E', 'F'),  // 26
+  TAG('G', 'P', 'O', 'S'),  // 27
+  TAG('G', 'S', 'U', 'B'),  // 28
+};
+
+int KnownTableIndex(uint32_t tag) {
+  for (int i = 0; i < 29; ++i) {
+    if (tag == known_tags[i]) return i;
+  }
+  return 31;
+}
+
+bool ReadShortDirectory(ots::Buffer* file, std::vector<Table>* tables,
+    size_t num_tables) {
+  uint32_t last_compression_type = 0;
+  for (size_t i = 0; i < num_tables; ++i) {
+    Table* table = &(*tables)[i];
+    uint8_t flag_byte;
+    if (!file->ReadU8(&flag_byte)) {
+      return OTS_FAILURE();
+    }
+    uint32_t tag;
+    if ((flag_byte & 0x1f) == 0x1f) {
+      if (!file->ReadU32(&tag)) {
+        return OTS_FAILURE();
+      }
+    } else {
+      if ((flag_byte & 0x1f) >= (sizeof(known_tags) / sizeof(known_tags[0]))) {
+        return OTS_FAILURE();
+      }
+      tag = known_tags[flag_byte & 0x1f];
+    }
+    uint32_t flags = flag_byte >> 6;
+    if (flags == kShortFlagsContinue) {
+      flags = last_compression_type | kWoff2FlagsContinueStream;
+    } else {
+      if (flags == kCompressionTypeNone ||
+          flags == kCompressionTypeGzip ||
+          flags == kCompressionTypeLzma) {
+        last_compression_type = flags;
+      } else {
+        return OTS_FAILURE();
+      }
+    }
+    if ((flag_byte & 0x20) != 0) {
+      flags |= kWoff2FlagsTransform;
+    }
+    uint32_t dst_length;
+    if (!ReadBase128(file, &dst_length)) {
+      return OTS_FAILURE();
+    }
+    uint32_t transform_length = dst_length;
+    if ((flags & kWoff2FlagsTransform) != 0) {
+      if (!ReadBase128(file, &transform_length)) {
+        return OTS_FAILURE();
+      }
+    }
+    uint32_t src_length = transform_length;
+    if ((flag_byte >> 6) == 1 || (flag_byte >> 6) == 2) {
+      if (!ReadBase128(file, &src_length)) {
+        return OTS_FAILURE();
+      }
+    } else if ((flag_byte >> 6) == kShortFlagsContinue) {
+      // The compressed data for this table is in a previuos table, so we set
+      // the src_length to zero.
+      src_length = 0;
+    }
+    table->tag = tag;
+    table->flags = flags;
+    table->src_length = src_length;
+    table->transform_length = transform_length;
+    table->dst_length = dst_length;
+  }
+  return true;
+}
+
+}  // namespace
+
+size_t ComputeWOFF2FinalSize(const uint8_t* data, size_t length) {
+  ots::Buffer file(data, length);
+  uint32_t total_length;
+
+  if (!file.Skip(16) ||
+      !file.ReadU32(&total_length)) {
+    return 0;
+  }
+  return total_length;
+}
+
+bool ConvertWOFF2ToTTF(uint8_t* result, size_t result_length,
+                       const uint8_t* data, size_t length) {
+  ots::Buffer file(data, length);
+
+  uint32_t signature;
+  uint32_t flavor;
+  if (!file.ReadU32(&signature) || signature != kWoff2Signature ||
+      !file.ReadU32(&flavor)) {
+    return OTS_FAILURE();
+  }
+
+  // TODO(user): Should call IsValidVersionTag() here.
+
+  uint32_t reported_length;
+  if (!file.ReadU32(&reported_length) || length != reported_length) {
+    return OTS_FAILURE();
+  }
+  uint16_t num_tables;
+  if (!file.ReadU16(&num_tables) || !num_tables) {
+    return OTS_FAILURE();
+  }
+  // These reserved bits will be always zero in the final format, but they
+  // temporarily indicate the use of brotli, so that we can evaluate gzip, lzma
+  // and brotli side-by-side.
+  uint16_t reserved;
+  if (!file.ReadU16(&reserved)) {
+    return OTS_FAILURE();
+  }
+  // We don't care about these fields of the header:
+  //   uint32_t total_sfnt_size
+  //   uint16_t major_version, minor_version
+  //   uint32_t meta_offset, meta_length, meta_orig_length
+  //   uint32_t priv_offset, priv_length
+  if (!file.Skip(28)) {
+    return OTS_FAILURE();
+  }
+  std::vector<Table> tables(num_tables);
+  // Note: change below to ReadLongDirectory to enable long format.
+  if (!ReadShortDirectory(&file, &tables, num_tables)) {
+    return OTS_FAILURE();
+  }
+  uint64_t src_offset = file.offset();
+  uint64_t dst_offset = kSfntHeaderSize +
+      kSfntEntrySize * static_cast<uint64_t>(num_tables);
+  uint64_t uncompressed_sum = 0;
+  for (uint16_t i = 0; i < num_tables; ++i) {
+    Table* table = &tables[i];
+    table->src_offset = src_offset;
+    src_offset += table->src_length;
+    if (src_offset > std::numeric_limits<uint32_t>::max()) {
+      return OTS_FAILURE();
+    }
+    src_offset = Round4(src_offset);  // TODO: reconsider
+    table->dst_offset = dst_offset;
+    dst_offset += table->dst_length;
+    if (dst_offset > std::numeric_limits<uint32_t>::max()) {
+      return OTS_FAILURE();
+    }
+    dst_offset = Round4(dst_offset);
+    if ((table->flags & kCompressionTypeMask) != kCompressionTypeNone) {
+      uncompressed_sum += table->src_length;
+      if (uncompressed_sum > std::numeric_limits<uint32_t>::max()) {
+        return OTS_FAILURE();
+      }
+    }
+  }
+  // Enforce same 30M limit on uncompressed tables as OTS
+  if (uncompressed_sum > 30 * 1024 * 1024) {
+    return OTS_FAILURE();
+  }
+  if (src_offset > length || dst_offset > result_length) {
+    return OTS_FAILURE();
+  }
+
+  const uint32_t sfnt_header_and_table_directory_size = 12 + 16 * num_tables;
+  if (sfnt_header_and_table_directory_size > result_length) {
+    return OTS_FAILURE();
+  }
+
+  // Start building the font
+  size_t offset = 0;
+  offset = StoreU32(result, offset, flavor);
+  offset = Store16(result, offset, num_tables);
+  unsigned max_pow2 = 0;
+  while (1u << (max_pow2 + 1) <= num_tables) {
+    max_pow2++;
+  }
+  const uint16_t output_search_range = (1u << max_pow2) << 4;
+  offset = Store16(result, offset, output_search_range);
+  offset = Store16(result, offset, max_pow2);
+  offset = Store16(result, offset, (num_tables << 4) - output_search_range);
+  for (uint16_t i = 0; i < num_tables; ++i) {
+    const Table* table = &tables[i];
+    offset = StoreU32(result, offset, table->tag);
+    offset = StoreU32(result, offset, 0);  // checksum, to fill in later
+    offset = StoreU32(result, offset, table->dst_offset);
+    offset = StoreU32(result, offset, table->dst_length);
+  }
+  std::vector<uint8_t> uncompressed_buf;
+  bool continue_valid = false;
+  const uint8_t* transform_buf = NULL;
+  for (uint16_t i = 0; i < num_tables; ++i) {
+    const Table* table = &tables[i];
+    uint32_t flags = table->flags;
+    const uint8_t* src_buf = data + table->src_offset;
+    uint32_t compression_type = flags & kCompressionTypeMask;
+    if (compression_type == kCompressionTypeLzma && reserved > 0) {
+      compression_type = kCompressionTypeLzma + reserved;
+    }
+    size_t transform_length = table->transform_length;
+    if ((flags & kWoff2FlagsContinueStream) != 0) {
+      if (!continue_valid) {
+        return OTS_FAILURE();
+      }
+    } else if (compression_type == kCompressionTypeNone) {
+      if (transform_length != table->src_length) {
+        return OTS_FAILURE();
+      }
+      transform_buf = src_buf;
+      continue_valid = false;
+    } else if ((flags & kWoff2FlagsContinueStream) == 0) {
+      uint64_t total_size = transform_length;
+      for (uint16_t j = i + 1; j < num_tables; ++j) {
+        if ((tables[j].flags & kWoff2FlagsContinueStream) == 0) {
+          break;
+        }
+        total_size += tables[j].transform_length;
+        if (total_size > std::numeric_limits<uint32_t>::max()) {
+          return OTS_FAILURE();
+        }
+      }
+      uncompressed_buf.resize(total_size);
+      if (!Woff2Uncompress(&uncompressed_buf[0], total_size,
+          src_buf, table->src_length, compression_type)) {
+        return OTS_FAILURE();
+      }
+      transform_buf = &uncompressed_buf[0];
+      continue_valid = true;
+    } else {
+      return OTS_FAILURE();
+    }
+
+    if ((flags & kWoff2FlagsTransform) == 0) {
+      if (transform_length != table->dst_length) {
+        return OTS_FAILURE();
+      }
+      if (static_cast<uint64_t>(table->dst_offset + transform_length) >
+          result_length) {
+        return OTS_FAILURE();
+      }
+      std::memcpy(result + table->dst_offset, transform_buf,
+          transform_length);
+    } else {
+      if (!ReconstructTransformed(tables, table->tag,
+            transform_buf, transform_length, result, result_length)) {
+        return OTS_FAILURE();
+      }
+    }
+    if (continue_valid) {
+      transform_buf += transform_length;
+      if (transform_buf > uncompressed_buf.data() + uncompressed_buf.size()) {
+        return OTS_FAILURE();
+      }
+    }
+  }
+
+  return FixChecksums(tables, result);
+}
+
+void StoreTableEntry(const Table& table, size_t* offset, uint8_t* dst) {
+  uint8_t flag_byte = KnownTableIndex(table.tag);
+  if ((table.flags & kWoff2FlagsTransform) != 0) {
+    flag_byte |= 0x20;
+  }
+  if ((table.flags & kWoff2FlagsContinueStream) != 0) {
+    flag_byte |= 0xc0;
+  } else {
+    flag_byte |= ((table.flags & 3) << 6);
+  }
+  dst[(*offset)++] = flag_byte;
+  if ((flag_byte & 0x1f) == 0x1f) {
+    StoreU32(table.tag, offset, dst);
+  }
+  StoreBase128(table.src_length, offset, dst);
+  if ((flag_byte & 0x20) != 0) {
+    StoreBase128(table.transform_length, offset, dst);
+  }
+  if ((flag_byte & 0xc0) == 0x40 || (flag_byte & 0xc0) == 0x80) {
+    StoreBase128(table.dst_length, offset, dst);
+  }
+}
+
+size_t TableEntrySize(const Table& table) {
+  size_t size = KnownTableIndex(table.tag) < 31 ? 1 : 5;
+  size += Base128Size(table.src_length);
+  if ((table.flags & kWoff2FlagsTransform) != 0) {
+    size += Base128Size(table.transform_length);
+  }
+  if ((table.flags & kWoff2FlagsContinueStream) == 0 &&
+      ((table.flags & 3) == kCompressionTypeGzip ||
+       (table.flags & 3) == kCompressionTypeLzma)) {
+    size += Base128Size(table.dst_length);
+  }
+  return size;
+}
+
+size_t ComputeWoff2Length(const std::vector<Table>& tables) {
+  size_t size = 44;  // header size
+  for (const auto& table : tables) {
+    size += TableEntrySize(table);
+  }
+  for (const auto& table : tables) {
+    size += table.dst_length;
+    size = Round4(size);
+  }
+  return size;
+}
+
+size_t ComputeTTFLength(const std::vector<Table>& tables) {
+  size_t size = 12 + 16 * tables.size();  // sfnt header
+  for (const auto& table : tables) {
+    size += Round4(table.src_length);
+  }
+  return size;
+}
+
+size_t ComputeTotalTransformLength(const Font& font) {
+  size_t total = 0;
+  for (const auto& i : font.tables) {
+    const Font::Table& table = i.second;
+    if (table.tag & 0x80808080 || !font.FindTable(table.tag ^ 0x80808080)) {
+      // Count transformed tables and non-transformed tables that do not have
+      // transformed versions.
+      total += table.length;
+    }
+  }
+  return total;
+}
+
+struct Woff2ConvertOptions {
+  uint32_t compression_type;
+  bool continue_streams;
+  bool keep_dsig;
+  bool transform_glyf;
+
+  Woff2ConvertOptions()
+      : compression_type(kCompressionTypeBrotli),
+        continue_streams(true),
+        keep_dsig(true),
+        transform_glyf(true) {}
+
+
+};
+
+size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length) {
+  // Except for the header size, which is 32 bytes larger in woff2 format,
+  // all other parts should be smaller (table header in short format,
+  // transformations and compression). Just to be sure, we will give some
+  // headroom anyway.
+  return length + 1024;
+}
+
+bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
+                       uint8_t *result, size_t *result_length) {
+
+  Woff2ConvertOptions options;
+
+  Font font;
+  if (!ReadFont(data, length, &font)) {
+    fprintf(stderr, "Parsing of the input font failed.\n");
+    return false;
+  }
+
+  if (!NormalizeFont(&font)) {
+    fprintf(stderr, "Font normalization failed.\n");
+    return false;
+  }
+
+  if (!options.keep_dsig) {
+    font.tables.erase(TAG('D', 'S', 'I', 'G'));
+  }
+
+  if (options.transform_glyf &&
+      !TransformGlyfAndLocaTables(&font)) {
+    fprintf(stderr, "Font transformation failed.\n");
+    return false;
+  }
+
+  const Font::Table* head_table = font.FindTable(kHeadTableTag);
+  if (head_table == NULL) {
+    fprintf(stderr, "Missing head table.\n");
+    return false;
+  }
+
+  // Although the compressed size of each table in the final woff2 file won't
+  // be larger than its transform_length, we have to allocate a large enough
+  // buffer for the compressor, since the compressor can potentially increase
+  // the size. If the compressor overflows this, it should return false and
+  // then this function will also return false.
+  size_t total_transform_length = ComputeTotalTransformLength(font);
+  size_t compression_buffer_size = 1.2 * total_transform_length + 10240;
+  std::vector<uint8_t> compression_buf(compression_buffer_size);
+  size_t compression_buf_offset = 0;
+  uint32_t total_compressed_length = compression_buffer_size;
+
+  if (options.continue_streams) {
+    // Collect all transformed data into one place.
+    std::vector<uint8_t> transform_buf(total_transform_length);
+    size_t transform_offset = 0;
+    for (const auto& i : font.tables) {
+      if (i.second.tag & 0x80808080) continue;
+      const Font::Table* table = font.FindTable(i.second.tag ^ 0x80808080);
+      if (table == NULL) table = &i.second;
+      StoreBytes(table->data, table->length,
+                 &transform_offset, &transform_buf[0]);
+    }
+    // Compress all transformed data in one stream.
+    if (!Woff2Compress(transform_buf.data(), total_transform_length,
+                       options.compression_type,
+                       &compression_buf[0],
+                       &total_compressed_length)) {
+      fprintf(stderr, "Compression of combined table failed.\n");
+      return false;
+    }
+  }
+
+  std::vector<Table> tables;
+  for (const auto& i : font.tables) {
+    const Font::Table& src_table = i.second;
+    if (src_table.tag & 0x80808080) {
+      // This is a transformed table, we will write it together with the
+      // original version.
+      continue;
+    }
+    Table table;
+    table.tag = src_table.tag;
+    table.flags = std::min(options.compression_type, kCompressionTypeLzma);
+    table.src_length = src_table.length;
+    table.transform_length = src_table.length;
+    const uint8_t* transformed_data = src_table.data;
+    const Font::Table* transformed_table =
+        font.FindTable(src_table.tag ^ 0x80808080);
+    if (transformed_table != NULL) {
+      table.flags |= kWoff2FlagsTransform;
+      table.transform_length = transformed_table->length;
+      transformed_data = transformed_table->data;
+    }
+    if (options.continue_streams) {
+      if (tables.empty()) {
+        table.dst_length = total_compressed_length;
+        table.dst_data = &compression_buf[0];
+      } else {
+        table.dst_length = 0;
+        table.dst_data = NULL;
+        table.flags |= kWoff2FlagsContinueStream;
+      }
+    } else {
+      table.dst_length = table.transform_length;
+      table.dst_data = transformed_data;
+      if (options.compression_type != kCompressionTypeNone) {
+        uint32_t compressed_length =
+            compression_buf.size() - compression_buf_offset;
+        if (!Woff2Compress(transformed_data, table.transform_length,
+                           options.compression_type,
+                           &compression_buf[compression_buf_offset],
+                           &compressed_length)) {
+          fprintf(stderr, "Compression of table %x failed.\n", src_table.tag);
+          return false;
+        }
+        if (compressed_length >= table.transform_length) {
+          table.flags &= (~3);  // no compression
+        } else {
+          table.dst_length = compressed_length;
+          table.dst_data = &compression_buf[compression_buf_offset];
+          compression_buf_offset += table.dst_length;
+        }
+      }
+    }
+    tables.push_back(table);
+  }
+
+  size_t woff2_length = ComputeWoff2Length(tables);
+  if (woff2_length > *result_length) {
+    fprintf(stderr, "Result allocation was too small (%zd vs %zd bytes).\n",
+           *result_length, woff2_length);
+    return false;
+  }
+  *result_length = woff2_length;
+  uint16_t reserved =
+      (options.compression_type > kCompressionTypeLzma) ?
+      options.compression_type - kCompressionTypeLzma : 0;
+
+  size_t offset = 0;
+  StoreU32(kWoff2Signature, &offset, result);
+  StoreU32(font.flavor, &offset, result);
+  StoreU32(woff2_length, &offset, result);
+  Store16(tables.size(), &offset, result);
+  Store16(reserved, &offset, result);
+  StoreU32(ComputeTTFLength(tables), &offset, result);
+  StoreBytes(head_table->data + 4, 4, &offset, result);  // font revision
+  StoreU32(0, &offset, result);  // metaOffset
+  StoreU32(0, &offset, result);  // metaLength
+  StoreU32(0, &offset, result);  // metaOrigLength
+  StoreU32(0, &offset, result);  // privOffset
+  StoreU32(0, &offset, result);  // privLength
+  for (const auto& table : tables) {
+    StoreTableEntry(table, &offset, result);
+  }
+  for (const auto& table : tables) {
+    StoreBytes(table.dst_data, table.dst_length, &offset, result);
+    offset = Round4(offset);
+  }
+  if (*result_length != offset) {
+    fprintf(stderr, "Mismatch between computed and actual length "
+            "(%zd vs %zd)\n", *result_length, offset);
+    return false;
+  }
+  return true;
+}
+
+} // namespace woff2
diff --git a/woff2/woff2.h b/woff2/woff2.h
new file mode 100644
index 0000000..aba5080
--- /dev/null
+++ b/woff2/woff2.h
@@ -0,0 +1,50 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for converting WOFF2 format font files to their TTF versions.
+
+#ifndef BROTLI_WOFF2_WOFF2_H_
+#define BROTLI_WOFF2_WOFF2_H_
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <string>
+
+namespace woff2 {
+
+using std::string;
+
+// Compute the size of the final uncompressed font, or 0 on error.
+size_t ComputeWOFF2FinalSize(const uint8_t *data, size_t length);
+
+// Decompresses the font into the target buffer. The result_length should
+// be the same as determined by ComputeFinalSize(). Returns true on successful
+// decompression.
+bool ConvertWOFF2ToTTF(uint8_t *result, size_t result_length,
+                       const uint8_t *data, size_t length);
+
+// Returns an upper bound on the size of the compressed file.
+size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length);
+
+// Compresses the font into the target buffer. *result_length should be at least
+// the value returned by MaxWOFF2CompressedSize(), upon return, it is set to the
+// actual compressed size. Returns true on successful compression.
+bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
+                       uint8_t *result, size_t *result_length);
+
+
+
+} // namespace woff2
+
+#endif  // BROTLI_WOFF2_WOFF2_H_
diff --git a/woff2/woff2_compress.cc b/woff2/woff2_compress.cc
new file mode 100644
index 0000000..778369b
--- /dev/null
+++ b/woff2/woff2_compress.cc
@@ -0,0 +1,52 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A commandline tool for compressing ttf format files to woff2.
+
+#include <string>
+
+#include "file.h"
+#include "./woff2.h"
+
+
+int main(int argc, char **argv) {
+  using std::string;
+
+  if (argc != 2) {
+    fprintf(stderr, "One argument, the input filename, must be provided.\n");
+    return 1;
+  }
+
+  string filename(argv[1]);
+  string outfilename = filename.substr(0, filename.find_last_of(".")) + ".woff2";
+  fprintf(stdout, "Processing %s => %s\n",
+    filename.c_str(), outfilename.c_str());
+  string input = woff2::GetFileContent(filename);
+
+  const uint8_t* input_data = reinterpret_cast<const uint8_t*>(input.data());
+  size_t output_size = woff2::MaxWOFF2CompressedSize(input_data, input.size());
+  string output(output_size, 0);
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(&output[0]);
+
+  if (!woff2::ConvertTTFToWOFF2(input_data, input.size(),
+                                output_data, &output_size)) {
+    fprintf(stderr, "Compression failed.\n");
+    return 1;
+  }
+  output.resize(output_size);
+
+  woff2::SetFileContents(outfilename, output);
+
+  return 0;
+}
diff --git a/woff2/woff2_decompress.cc b/woff2/woff2_decompress.cc
new file mode 100644
index 0000000..c083793
--- /dev/null
+++ b/woff2/woff2_decompress.cc
@@ -0,0 +1,54 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A very simple commandline tool for decompressing woff2 format files to true
+// type font files.
+
+#include <string>
+
+
+#include "file.h"
+#include "./woff2.h"
+
+int main(int argc, char **argv) {
+  using std::string;
+
+  if (argc != 2) {
+    fprintf(stderr, "One argument, the input filename, must be provided.\n");
+    return 1;
+  }
+
+  string filename(argv[1]);
+  string outfilename = filename.substr(0, filename.find_last_of(".")) + ".ttf";
+  fprintf(stdout, "Processing %s => %s\n",
+    filename.c_str(), outfilename.c_str());
+  string input = woff2::GetFileContent(filename);
+
+  size_t decompressed_size = woff2::ComputeWOFF2FinalSize(
+      reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  string output(decompressed_size, 0);
+  const bool ok = woff2::ConvertWOFF2ToTTF(
+      reinterpret_cast<uint8_t*>(&output[0]), decompressed_size,
+      reinterpret_cast<const uint8_t*>(input.data()), input.size());
+
+  if (!ok) {
+    fprintf(stderr, "Decompression failed\n");
+    return 1;
+  }
+
+  woff2::SetFileContents(outfilename, output);
+
+  return 0;
+}
+
diff --git a/woff2_header_dump.py b/woff2_header_dump.py
new file mode 100644
index 0000000..b352d50
--- /dev/null
+++ b/woff2_header_dump.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2012 Google Inc. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This is a simple utility for dumping out the header of a compressed file, and
+# is suitable for doing spot checks of compressed. files. However, this only
+# implements the "long" form of the table directory.
+
+import struct
+import sys
+
+def dump_woff2_header(header):
+  header_values = struct.unpack('>IIIHHIHHIIIII', header[:44])
+  for i, key in enumerate([
+    'signature',
+    'flavor',
+    'length',
+    'numTables',
+    'reserved',
+    'totalSfntSize',
+    'majorVersion',
+    'minorVersion',
+    'metaOffset',
+    'metaOrigLength',
+    'privOffset',
+    'privLength']):
+    print key, header_values[i]
+  numTables = header_values[3]
+  for i in range(numTables):
+    entry = struct.unpack('>IIIII', header[44+20*i:44+20*(i+1)])
+    print '%08x %d %d %d %d' % entry
+
+def main():
+  header = file(sys.argv[1]).read()
+  dump_woff2_header(header)
+
+main()
+