diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 0000000..4c08cb2
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,108 @@
+# This configuration was automatically generated from a CircleCI 1.0 config.
+# It should include any build commands you had along with commands that CircleCI
+# inferred from your project structure. We strongly recommend you read all the
+# comments in this file to understand the structure of CircleCI 2.0, as the idiom
+# for configuration has changed substantially in 2.0 to allow arbitrary jobs rather
+# than the prescribed lifecycle of 1.0. In general, we recommend using this generated
+# configuration as a reference rather than using it in production, though in most
+# cases it should duplicate the execution of your original 1.0 config.
+version: 2
+jobs:
+  build:
+    working_directory: ~/lz4/lz4
+    parallelism: 1
+    shell: /bin/bash --login
+    # CircleCI 2.0 does not support environment variables that refer to each other the same way as 1.0 did.
+    # If any of these refer to each other, rewrite them so that they don't or see https://circleci.com/docs/2.0/env-vars/#interpolating-environment-variables-to-set-other-environment-variables .
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+      CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
+    # In CircleCI 1.0 we used a pre-configured image with a large number of languages and other packages.
+    # In CircleCI 2.0 you can now specify your own image, or use one of our pre-configured images.
+    # The following configuration line tells CircleCI to use the specified docker image as the runtime environment for you job.
+    # We have selected a pre-built image that mirrors the build environment we use on
+    # the 1.0 platform, but we recommend you choose an image more tailored to the needs
+    # of each job. For more information on choosing an image (or alternatively using a
+    # VM instead of a container) see https://circleci.com/docs/2.0/executor-types/
+    # To see the list of pre-built images that CircleCI provides for most common languages see
+    # https://circleci.com/docs/2.0/circleci-images/
+    docker:
+    - image: circleci/build-image:ubuntu-14.04-XXL-upstart-1189-5614f37
+      command: /sbin/init
+    steps:
+    # Machine Setup
+    #   If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each
+    # The following `checkout` command checks out your code to your working directory. In 1.0 we did this implicitly. In 2.0 you can choose where in the course of a job your code should be checked out.
+    - checkout
+    # Prepare for artifact and test results  collection equivalent to how it was done on 1.0.
+    # In many cases you can simplify this from what is generated here.
+    # 'See docs on artifact collection here https://circleci.com/docs/2.0/artifacts/'
+    - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
+    # Dependencies
+    #   This would typically go in either a build or a build-and-test job when using workflows
+    # Restore the dependency cache
+    - restore_cache:
+        keys:
+        # This branch if available
+        - v1-dep-{{ .Branch }}-
+        # Default branch if not
+        - v1-dep-dev-
+        # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
+        - v1-dep-
+    # This is based on your 1.0 configuration file or project settings
+    - run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update
+    - run: sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu
+    - run: sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross
+    - run: sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind
+    # Save dependency cache
+    - save_cache:
+        key: v1-dep-{{ .Branch }}-{{ epoch }}
+        paths:
+        # This is a broad list of cache paths to include many possible development environments
+        # You can probably delete some of these entries
+        - vendor/bundle
+        - ~/virtualenvs
+        - ~/.m2
+        - ~/.ivy2
+        - ~/.bundle
+        - ~/.go_workspace
+        - ~/.gradle
+        - ~/.cache/bower
+    # Test
+    #   This would typically be a build job when using workflows, possibly combined with build
+    # This is based on your 1.0 configuration file or project settings
+    - run: clang -v; make clangtest && make clean
+    - run: g++ -v; make gpptest     && make clean
+    - run: gcc -v; make c_standards && make clean
+    - run: gcc -v; g++ -v; make ctocpptest && make clean
+    - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean
+    - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
+    - run: gcc-6 -v; CC=gcc-6 make c_standards && make clean
+    - run: gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check  && make clean
+    - run: make cmake               && make clean
+    - run: make -C tests test-lz4
+    - run: make -C tests test-lz4c
+    - run: make -C tests test-frametest
+    - run: make -C tests test-fullbench
+    - run: make -C tests test-fuzzer && make clean
+    - run: make -C lib all          && make clean
+    - run: pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean
+    - run: make travis-install      && make clean
+    - run: gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
+    - run: make usan                && make clean
+    - run: clang -v; make staticAnalyze && make clean
+    - run: make -C tests test-mem && make clean
+    - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static                  && make clean
+    - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean
+    - run: make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static                  && make clean
+    - run: make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static              && make clean
+    # Teardown
+    #   If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each
+    # Save test results
+    - store_test_results:
+        path: /tmp/circleci-test-results
+    # Save artifacts
+    - store_artifacts:
+        path: /tmp/circleci-artifacts
+    - store_artifacts:
+        path: /tmp/circleci-test-results
diff --git a/.gitignore b/.gitignore
index 117b02d..829270b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,7 @@
 # Mac
 .DS_Store
 *.dSYM
+
+# Windows / Msys
+nul
+ld.exe*
diff --git a/.travis.yml b/.travis.yml
index dc61505..de6875b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,15 +11,15 @@
     # Container-based 12.04 LTS Server Edition 64 bit (doesn't support 32-bit includes)
     - os: linux
       sudo: false
-      env: Ubu=12.04cont Cmd='make -C tests test-lz4 test-lz4c test-fasttest test-fullbench' COMPILER=cc
+      env: Ubu=12.04cont Cmd='make -C tests test-lz4 test-lz4c test-fullbench' COMPILER=cc
+
+    - os: linux
+      sudo: required
+      env: Ubu=12.04cont Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest test-fuzzer' COMPILER=cc
 
     - os: linux
       sudo: false
-      env: Ubu=12.04cont Cmd='make -C tests test-frametest test-fuzzer' COMPILER=cc
-
-    - os: linux
-      sudo: false
-      env: Ubu=12.04cont Cmd="make gpptest && make clean examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
+      env: Ubu=12.04cont Cmd="make gpptest && make clean && make examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
 
 
     # 14.04 LTS Server Edition 64 bit
@@ -32,7 +32,8 @@
             - libc6-dev-i386
             - gcc-multilib
 
-    - env: Ubu=14.04 Cmd='make usan' COMPILER=clang
+    # presume clang >= v3.9.0
+    - env: Ubu=14.04 Cmd='make usan MOREFLAGS=-Wcomma -Werror' COMPILER=clang
       dist: trusty
       sudo: required
       addons:
@@ -48,6 +49,10 @@
           packages:
             - valgrind
 
+    - env: Ubu=14.04 Cmd='make ctocpptest' COMPILER=cc
+      dist: trusty
+      sudo: false
+
     - env: Ubu=14.04 Cmd='make -C tests test-lz4c32 test-fullbench32 versionsTest' COMPILER=cc
       dist: trusty
       sudo: required
@@ -58,7 +63,7 @@
             - libc6-dev-i386
             - gcc-multilib
 
-    - env: Ubu=14.04 Cmd='make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
+    - env: Ubu=14.04 Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
       dist: trusty
       sudo: required
       addons:
@@ -144,7 +149,15 @@
             - gcc-multilib
             - gcc-4.4
 
+    # tag-specific test
+    - if: tag =~ ^v[0-9]\.[0-9]
+      os: linux
+      sudo: false
+      env: Cmd="make -C tests checkTag && tests/checkTag $TRAVIS_BRANCH " COMPILER=cc
+
+
 script:
+  - uname -a
   - echo Cmd=$Cmd
   - $COMPILER -v
   - sh -c "$Cmd"
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..6aab067
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,16 @@
+Installation
+=============
+
+```
+make
+make install     # this command may require root access
+```
+
+LZ4's `Makefile` supports standard [Makefile conventions],
+including [staged installs], [redirection], or [command redefinition].
+It is compatible with parallel builds (`-j#`).
+
+[Makefile conventions]: https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
+[staged installs]: https://www.gnu.org/prep/standards/html_node/DESTDIR.html
+[redirection]: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html
+[command redefinition]: https://www.gnu.org/prep/standards/html_node/Utilities-in-Makefiles.html
diff --git a/Makefile b/Makefile
index 1432e6b..69a34b7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 # ################################################################
 # LZ4 - Makefile
-# Copyright (C) Yann Collet 2011-2016
+# Copyright (C) Yann Collet 2011-present
 # All rights reserved.
 #
 # BSD license
@@ -26,16 +26,10 @@
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # You can contact the author at :
-#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 source repository : https://github.com/lz4/lz4
 #  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
 # ################################################################
 
-DESTDIR ?=
-PREFIX  ?= /usr/local
-VOID    := /dev/null
-
-LIBDIR ?= $(PREFIX)/lib
-INCLUDEDIR=$(PREFIX)/include
 LZ4DIR  = lib
 PRGDIR  = programs
 TESTDIR = tests
@@ -44,102 +38,70 @@
 
 # Define nul output
 ifneq (,$(filter Windows%,$(OS)))
-EXT = .exe
+EXT  = .exe
+VOID = nul
 else
-EXT =
+EXT  =
+VOID = /dev/null
 endif
 
 
-.PHONY: default all lib lz4 clean test versionsTest examples
+.PHONY: default
+default: lib-release lz4-release
 
-default:
-	@$(MAKE) -C $(LZ4DIR)
-	@$(MAKE) -C $(PRGDIR)
-	@cp $(PRGDIR)/lz4$(EXT) .
+.PHONY: all
+all: allmost manuals
 
-all:
+.PHONY: allmost
+allmost: lib lz4 examples
+
+.PHONY: lib lib-release liblz4.a
+lib: liblz4.a
+lib lib-release liblz4.a:
 	@$(MAKE) -C $(LZ4DIR) $@
-	@$(MAKE) -C $(PRGDIR) $@
-	@$(MAKE) -C $(TESTDIR) $@
-	@$(MAKE) -C $(EXDIR) $@
 
-lib:
-	@$(MAKE) -C $(LZ4DIR)
-
-lz4:
+.PHONY: lz4 lz4-release
+lz4 : liblz4.a
+lz4-release : lib-release
+lz4 lz4-release :
 	@$(MAKE) -C $(PRGDIR) $@
 	@cp $(PRGDIR)/lz4$(EXT) .
 
-lz4-release:
-	@$(MAKE) -C $(PRGDIR)
-	@cp $(PRGDIR)/lz4$(EXT) .
+.PHONY: examples
+examples: liblz4.a
+	$(MAKE) -C $(EXDIR) all
 
+.PHONY: manuals
+manuals:
+	@$(MAKE) -C contrib/gen_manual $@
+
+.PHONY: clean
 clean:
+	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
 	@$(MAKE) -C $(TESTDIR) $@ > $(VOID)
-	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(MAKE) -C $(EXDIR) $@ > $(VOID)
-	@$(MAKE) -C examples $@ > $(VOID)
+	@$(MAKE) -C contrib/gen_manual $@ > $(VOID)
 	@$(RM) lz4$(EXT)
 	@echo Cleaning completed
 
 
-#------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD, Hurd and
-#FreeBSD targets
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD))
+#-----------------------------------------------------------------------------
+# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
+#-----------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 HOST_OS = POSIX
 
-install:
-	@$(MAKE) -C $(LZ4DIR) $@
-	@$(MAKE) -C $(PRGDIR) $@
-
-uninstall:
+.PHONY: install uninstall
+install uninstall:
 	@$(MAKE) -C $(LZ4DIR) $@
 	@$(MAKE) -C $(PRGDIR) $@
 
 travis-install:
-	$(MAKE) -j1 install PREFIX=~/install_test_dir
+	$(MAKE) -j1 install DESTDIR=~/install_test_dir
 
-test:
-	$(MAKE) -C $(TESTDIR) $@
-
-clangtest: clean
-	clang -v
-	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(LZ4DIR)  all CC=clang
-	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(PRGDIR)  all CC=clang
-	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) all CC=clang
-
-clangtest-native: clean
-	clang -v
-	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(LZ4DIR)  all    CC=clang
-	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(PRGDIR)  native CC=clang
-	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang
-
-usan: clean
-	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
-
-usan32: clean
-	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
-
-staticAnalyze: clean
-	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
-
-platformTest: clean
-	@echo "\n ---- test lz4 with $(CC) compiler ----"
-	@$(CC) -v
-	CFLAGS="-O3 -Werror"         $(MAKE) -C $(LZ4DIR) all
-	CFLAGS="-O3 -Werror -static" $(MAKE) -C $(PRGDIR) all
-	CFLAGS="-O3 -Werror -static" $(MAKE) -C $(TESTDIR) all
-	$(MAKE) -C $(TESTDIR) test-platform
-
-versionsTest: clean
-	$(MAKE) -C $(TESTDIR) $@
-
-examples:
-	$(MAKE) -C $(LZ4DIR)
-	$(MAKE) -C $(PRGDIR) lz4
-	$(MAKE) -C examples test
+cmake:
+	@cd contrib/cmake_unofficial; cmake $(CMAKE_PARAMS) CMakeLists.txt; $(MAKE)
 
 endif
 
@@ -155,29 +117,74 @@
 #------------------------------------------------------------------------
 ifneq (,$(filter $(HOST_OS),MSYS POSIX))
 
-cmake:
-	@cd contrib/cmake_unofficial; cmake $(CMAKE_PARAMS) CMakeLists.txt; $(MAKE)
+.PHONY: list
+list:
+	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 
-gpptest: clean
-	g++ -v
-	CC=g++ $(MAKE) -C $(LZ4DIR)  all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(PRGDIR)  all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(TESTDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+.PHONY: check
+check:
+	$(MAKE) -C $(TESTDIR) test-lz4-essentials
 
-gpptest32: clean
-	g++ -v
-	CC=g++ $(MAKE) -C $(LZ4DIR)  all    CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(PRGDIR)  native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(TESTDIR) native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+.PHONY: test
+test:
+	$(MAKE) -C $(TESTDIR) $@
+	$(MAKE) -C $(EXDIR) $@
+
+clangtest: clean
+	clang -v
+	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(LZ4DIR)  all CC=clang
+	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(PRGDIR)  all CC=clang
+	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) all CC=clang
+
+clangtest-native: clean
+	clang -v
+	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(LZ4DIR)  all    CC=clang
+	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(PRGDIR)  native CC=clang
+	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang
+
+usan: clean
+	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
+
+usan32: clean
+	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
+
+staticAnalyze: clean
+	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
+
+platformTest: clean
+	@echo "\n ---- test lz4 with $(CC) compiler ----"
+	@$(CC) -v
+	CFLAGS="-O3 -Werror"         $(MAKE) -C $(LZ4DIR) all
+	CFLAGS="-O3 -Werror -static" $(MAKE) -C $(PRGDIR) all
+	CFLAGS="-O3 -Werror -static" $(MAKE) -C $(TESTDIR) all
+	$(MAKE) -C $(TESTDIR) test-platform
+
+.PHONY: versionsTest
+versionsTest: clean
+	$(MAKE) -C $(TESTDIR) $@
+
+gpptest gpptest32: CC = "$(CXX) -Wno-deprecated"
+gpptest gpptest32: CFLAGS = -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
+gpptest32: CFLAGS += -m32
+gpptest gpptest32: clean
+	$(CXX) -v
+	CC=$(CC) $(MAKE) -C $(LZ4DIR)  all CFLAGS="$(CFLAGS)"
+	CC=$(CC) $(MAKE) -C $(PRGDIR)  all CFLAGS="$(CFLAGS)"
+	CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)"
+
+ctocpptest: LIBCC="$(CC)"
+ctocpptest: TESTCC="$(CXX)"
+ctocpptest: CFLAGS=""
+ctocpptest: clean
+	CC=$(LIBCC)  $(MAKE) -C $(LZ4DIR)  CFLAGS="$(CFLAGS)" all
+	CC=$(LIBCC)  $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" lz4.o lz4hc.o lz4frame.o
+	CC=$(TESTCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" all
 
 c_standards: clean
-	$(MAKE) all MOREFLAGS="-std=gnu90 -Werror"
-	$(MAKE) clean
-	$(MAKE) all MOREFLAGS="-std=c99 -Werror"
-	$(MAKE) clean
-	$(MAKE) all MOREFLAGS="-std=gnu99 -Werror"
-	$(MAKE) clean
-	$(MAKE) all MOREFLAGS="-std=c11 -Werror"
-	$(MAKE) clean
+	CFLAGS="-std=c90   -Werror" $(MAKE) clean allmost
+	CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost
+	CFLAGS="-std=c99   -Werror" $(MAKE) clean allmost
+	CFLAGS="-std=gnu99 -Werror" $(MAKE) clean allmost
+	CFLAGS="-std=c11   -Werror" $(MAKE) clean allmost
 
 endif
diff --git a/NEWS b/NEWS
index 1d42954..13a9a1c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,51 @@
+v1.8.3
+perf: minor decompression speed improvement (~+2%) with gcc
+fix : corruption in v1.8.2 at level 9 for files > 64KB under rare conditions (#560)
+cli : new command --fast, by @jennifermliu
+api : LZ4_decompress_safe_partial() now decodes exactly the nb of bytes requested (feature request #566)
+build : added Haiku target, by @fbrosson, and MidnightBSD, by @laffer1
+doc : updated documentation regarding dictionary compression
+
+v1.8.2
+perf: *much* faster dictionary compression on small files, by @felixhandte
+perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv)
+perf: slightly faster HC compression and decompression speed
+perf: very small compression ratio improvement
+fix : compression compatible with low memory addresses (< 0xFFFF)
+fix : decompression segfault when provided with NULL input, by @terrelln
+cli : new command --favor-decSpeed
+cli : benchmark mode more accurate for small inputs
+fullbench : can bench _destSize() variants, by @felixhandte
+doc : clarified block format parsing restrictions, by Alexey Tourbin (@svpv)
+
+v1.8.1
+perf : faster and stronger ultra modes (levels 10+)
+perf : slightly faster compression and decompression speed
+perf : fix bad degenerative case, reported by @c-morgenstern
+fix : decompression failed when using a combination of extDict + low memory address (#397), reported and fixed by Julian Scheid (@jscheid)
+cli : support for dictionary compression (`-D`), by Felix Handte @felixhandte
+cli : fix : `lz4 -d --rm` preserves timestamp (#441)
+cli : fix : do not modify /dev/null permission as root, by @aliceatlas
+api : `_destSize()` variant supported for all compression levels
+build  : `make` and `make test` compatible with `-jX`, reported by @mwgamera
+build  : can control LZ4LIB_VISIBILITY macro, by @mikir
+install: fix man page directory (#387), reported by Stuart Cardall (@itoffshore)
+
+v1.8.0
+cli : fix : do not modify /dev/null permissions, reported by @Maokaman1
+cli : added GNU separator -- specifying that all following arguments are files
+API : added LZ4_compress_HC_destSize(), by Oleg (@remittor)
+API : added LZ4F_resetDecompressionContext()
+API : lz4frame : negative compression levels trigger fast acceleration, request by Lawrence Chan
+API : lz4frame : can control block checksum and dictionary ID
+API : fix : expose obsolete decoding functions, reported by Chen Yufei
+API : experimental : lz4frame_static : new dictionary compression API
+build : fix : static lib installation, by Ido Rosen
+build : dragonFlyBSD, OpenBSD, NetBSD supported
+build : LZ4_MEMORY_USAGE can be modified at compile time, through external define
+doc : Updated LZ4 Frame format to v1.6.0, restoring Dictionary-ID field
+doc : lz4 api manual, by Przemyslaw Skibinski
+
 v1.7.5
 lz4hc : new high compression mode : levels 10-12 compress more and slower, by Przemyslaw Skibinski
 lz4cat : fix : works with relative path (#284) and stdin (#285) (reported by @beiDei8z)
diff --git a/README.md b/README.md
index 04ba0db..e64020d 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,23 @@
 ================================
 
 LZ4 is lossless compression algorithm,
-providing compression speed at 400 MB/s per core,
+providing compression speed > 500 MB/s per core,
 scalable with multi-cores CPU.
 It features an extremely fast decoder,
 with speed in multiple GB/s per core,
 typically reaching RAM speed limits on multi-core systems.
 
 Speed can be tuned dynamically, selecting an "acceleration" factor
-which trades compression ratio for more speed up.
+which trades compression ratio for faster speed.
 On the other end, a high compression derivative, LZ4_HC, is also provided,
 trading CPU time for improved compression ratio.
 All versions feature the same decompression speed.
 
+LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression),
+and can ingest any input file as dictionary,
+including those created by [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder).
+(note: only the final 64KB are used).
+
 LZ4 library is provided as open-source software using BSD 2-Clause license.
 
 
@@ -25,14 +30,13 @@
 [travisMasterBadge]: https://travis-ci.org/lz4/lz4.svg?branch=master "Continuous Integration test suite"
 [travisDevBadge]: https://travis-ci.org/lz4/lz4.svg?branch=dev "Continuous Integration test suite"
 [travisLink]: https://travis-ci.org/lz4/lz4
-[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/jc2yhgwyc7qqtsko/branch/master?svg=true "Windows test suite"
-[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/jc2yhgwyc7qqtsko/branch/dev?svg=true "Windows test suite"
+[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=master&svg=true "Windows test suite"
+[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=dev&svg=true "Windows test suite"
 [AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4-1lndh
 [coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch"
 [coverlink]: https://scan.coverity.com/projects/4735
 
 > **Branch Policy:**
-
 > - The "master" branch is considered stable, at all times.
 > - The "dev" branch is the one where all contributions must be merged
     before being promoted to master.
@@ -44,33 +48,50 @@
 -------------------------
 
 The benchmark uses [lzbench], from @inikep
-compiled with GCC v6.2.0 on Linux 64-bits.
-The reference system uses a Core i7-3930K CPU @ 4.5GHz.
+compiled with GCC v7.3.0 on Linux 64-bits (Debian 4.15.17-1).
+The reference system uses a Core i7-6700K CPU @ 4.0GHz.
 Benchmark evaluates the compression of reference [Silesia Corpus]
 in single-thread mode.
 
 [lzbench]: https://github.com/inikep/lzbench
 [Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
 
-|  Compressor            | Ratio   | Compression | Decompression |
-|  ----------            | -----   | ----------- | ------------- |
-|  memcpy                |  1.000  | 7300 MB/s   |   7300 MB/s   |
-|**LZ4 fast 8  (v1.7.3)**|  1.799  |**911 MB/s** | **3360 MB/s** |
-|**LZ4 default (v1.7.3)**|**2.101**|**625 MB/s** | **3220 MB/s** |
-|  LZO 2.09              |  2.108  |  620 MB/s   |    845 MB/s   |
-|  QuickLZ 1.5.0         |  2.238  |  510 MB/s   |    600 MB/s   |
-|  Snappy 1.1.3          |  2.091  |  450 MB/s   |   1550 MB/s   |
-|  LZF v3.6              |  2.073  |  365 MB/s   |    820 MB/s   |
-|  [Zstandard] 1.1.1 -1  |  2.876  |  330 MB/s   |    930 MB/s   |
-|  [Zstandard] 1.1.1 -3  |  3.164  |  200 MB/s   |    810 MB/s   |
-| [zlib] deflate 1.2.8 -1|  2.730  |  100 MB/s   |    370 MB/s   |
-|**LZ4 HC -9 (v1.7.3)**  |**2.720**|   34 MB/s   | **3240 MB/s** |
-| [zlib] deflate 1.2.8 -6|  3.099  |   33 MB/s   |    390 MB/s   |
+|  Compressor             | Ratio   | Compression | Decompression |
+|  ----------             | -----   | ----------- | ------------- |
+|  memcpy                 |  1.000  |13100 MB/s   |  13100 MB/s   |
+|**LZ4 default (v1.8.2)** |**2.101**|**730 MB/s** | **3900 MB/s** |
+|  LZO 2.09               |  2.108  |  630 MB/s   |    800 MB/s   |
+|  QuickLZ 1.5.0          |  2.238  |  530 MB/s   |    720 MB/s   |
+|  Snappy 1.1.4           |  2.091  |  525 MB/s   |   1750 MB/s   |
+|  [Zstandard] 1.3.4 -1   |  2.877  |  470 MB/s   |   1380 MB/s   |
+|  LZF v3.6               |  2.073  |  380 MB/s   |    840 MB/s   |
+| [zlib] deflate 1.2.11 -1|  2.730  |  100 MB/s   |    380 MB/s   |
+|**LZ4 HC -9 (v1.8.2)**   |**2.721**|   40 MB/s   | **3920 MB/s** |
+| [zlib] deflate 1.2.11 -6|  3.099  |   34 MB/s   |    410 MB/s   |
 
 [zlib]: http://www.zlib.net/
 [Zstandard]: http://www.zstd.net/
 
-LZ4 is also compatible and well optimized for x32 mode, for which it provides +10% speed performance.
+LZ4 is also compatible and optimized for x32 mode,
+for which it provides additional speed performance.
+
+
+Installation
+-------------------------
+
+```
+make
+make install     # this command may require root permissions
+```
+
+LZ4's `Makefile` supports standard [Makefile conventions],
+including [staged installs], [redirection], or [command redefinition].
+It is compatible with parallel builds (`-j#`).
+
+[Makefile conventions]: https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
+[staged installs]: https://www.gnu.org/prep/standards/html_node/DESTDIR.html
+[redirection]: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html
+[command redefinition]: https://www.gnu.org/prep/standards/html_node/Utilities-in-Makefiles.html
 
 
 Documentation
@@ -78,10 +99,10 @@
 
 The raw LZ4 block compression format is detailed within [lz4_Block_format].
 
-To compress an arbitrarily long file or data stream, multiple blocks are required.
-Organizing these blocks and providing a common header format to handle their content
-is the purpose of the Frame format, defined into [lz4_Frame_format].
-Interoperable versions of LZ4 must respect this frame format.
+Arbitrarily long files or data streams are compressed using multiple blocks,
+for streaming requirements. These blocks are organized into a frame,
+defined into [lz4_Frame_format].
+Interoperable versions of LZ4 must also respect the frame format.
 
 [lz4_Block_format]: doc/lz4_Block_format.md
 [lz4_Frame_format]: doc/lz4_Frame_format.md
diff --git a/appveyor.yml b/appveyor.yml
index 93c1101..056719a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -82,7 +82,7 @@
       ECHO *** &&
       ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&
-      msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
       ECHO *** &&
       ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&
diff --git a/contrib/cmake_unofficial/CMakeLists.txt b/contrib/cmake_unofficial/CMakeLists.txt
index 9a0983d..b09c4fb 100644
--- a/contrib/cmake_unofficial/CMakeLists.txt
+++ b/contrib/cmake_unofficial/CMakeLists.txt
@@ -12,6 +12,8 @@
 
 set(LZ4_TOP_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
 
+option(LZ4_BUILD_LEGACY_LZ4C "Build lz4c progam with legacy argument support" ON)
+
 # Parse version information
 file(STRINGS "${LZ4_TOP_SOURCE_DIR}/lib/lz4.h" LZ4_VERSION_MAJOR REGEX "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$")
 string(REGEX REPLACE "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$" "\\1" LZ4_VERSION_MAJOR "${LZ4_VERSION_MAJOR}")
@@ -122,14 +124,18 @@
 endif()
 
 # lz4
+set(LZ4_PROGRAMS_BUILT lz4cli)
 add_executable(lz4cli ${LZ4_CLI_SOURCES})
 set_target_properties(lz4cli PROPERTIES OUTPUT_NAME lz4)
 target_link_libraries(lz4cli ${LZ4_LINK_LIBRARY})
 
 # lz4c
-add_executable(lz4c ${LZ4_CLI_SOURCES})
-set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS")
-target_link_libraries(lz4c ${LZ4_LINK_LIBRARY})
+if (LZ4_BUILD_LEGACY_LZ4C)
+  list(APPEND LZ4_PROGRAMS_BUILT lz4c)
+  add_executable(lz4c ${LZ4_CLI_SOURCES})
+  set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS")
+  target_link_libraries(lz4c ${LZ4_LINK_LIBRARY})
+endif()
 
 # Extra warning flags
 include (CheckCCompilerFlag)
@@ -165,11 +171,12 @@
 if(NOT LZ4_BUNDLED_MODE)
   include(GNUInstallDirs)
 
-  install(TARGETS lz4cli lz4c
+  install(TARGETS ${LZ4_PROGRAMS_BUILT}
     RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
   install(TARGETS ${LZ4_LIBRARIES_BUILT}
     LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
-    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+    RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
   install(FILES
     "${LZ4_LIB_SOURCE_DIR}/lz4.h"
     "${LZ4_LIB_SOURCE_DIR}/lz4frame.h"
@@ -215,4 +222,6 @@
   set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
 endif()
 
+# for liblz4.pc substitution
+set(VERSION ${LZ4_VERSION_STRING})
 configure_file(${LZ4_LIB_SOURCE_DIR}/liblz4.pc.in liblz4.pc @ONLY)
diff --git a/contrib/gen_manual/.gitignore b/contrib/gen_manual/.gitignore
new file mode 100644
index 0000000..6ea967f
--- /dev/null
+++ b/contrib/gen_manual/.gitignore
@@ -0,0 +1,2 @@
+# build artefact
+gen_manual
diff --git a/contrib/gen_manual/Makefile b/contrib/gen_manual/Makefile
index adbcca2..95abe2e 100644
--- a/contrib/gen_manual/Makefile
+++ b/contrib/gen_manual/Makefile
@@ -30,12 +30,20 @@
 # ################################################################
 
 
-CFLAGS ?= -O3
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
-CFLAGS += $(MOREFLAGS)
-FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CXXFLAGS ?= -O3
+CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
+CXXFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)
 
-
+LZ4API = ../../lib/lz4.h
+LZ4MANUAL = ../../doc/lz4_manual.html
+LZ4FAPI = ../../lib/lz4frame.h
+LZ4FMANUAL = ../../doc/lz4frame_manual.html
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LZ4API)`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LZ4API)`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LZ4API)`
+LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
+LZ4VER := $(shell echo $(LIBVER_SCRIPT))
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
@@ -45,14 +53,24 @@
 endif
 
 
-.PHONY: default gen_manual
-
+.PHONY: default
 default: gen_manual
 
 gen_manual: gen_manual.cpp
-	$(CXX)      $(FLAGS) $^ -o $@$(EXT)
+	$(CXX) $(FLAGS) $^ -o $@$(EXT)
 
+$(LZ4MANUAL) : gen_manual $(LZ4API)
+	echo "Update lz4 manual in /doc"
+	./gen_manual $(LZ4VER) $(LZ4API) $@
 
+$(LZ4FMANUAL) : gen_manual $(LZ4FAPI)
+	echo "Update lz4frame manual in /doc"
+	./gen_manual $(LZ4VER) $(LZ4FAPI) $@
+
+.PHONY: manuals
+manuals: gen_manual $(LZ4MANUAL) $(LZ4FMANUAL)
+
+.PHONY: clean
 clean:
 	@$(RM) gen_manual$(EXT)
 	@echo Cleaning completed
diff --git a/contrib/gen_manual/gen-lz4-manual.sh b/contrib/gen_manual/gen-lz4-manual.sh
index 55d31a4..73a7214 100644
--- a/contrib/gen_manual/gen-lz4-manual.sh
+++ b/contrib/gen_manual/gen-lz4-manual.sh
@@ -6,4 +6,5 @@
 LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT
 
 echo LZ4_VERSION=$LIBVER_SCRIPT
-./gen_manual $LIBVER_SCRIPT ../../lib/lz4.h ./lz4_manual.html
+./gen_manual "lz4 $LIBVER_SCRIPT" ../../lib/lz4.h ./lz4_manual.html
+./gen_manual "lz4frame $LIBVER_SCRIPT" ../../lib/lz4frame.h ./lz4frame_manual.html
diff --git a/contrib/gen_manual/gen_manual.cpp b/contrib/gen_manual/gen_manual.cpp
index 2df081d..65abd3a 100644
--- a/contrib/gen_manual/gen_manual.cpp
+++ b/contrib/gen_manual/gen_manual.cpp
@@ -89,11 +89,13 @@
 /* print line with LZ4LIB_API removed and C++ comments not bold */
 void print_line(stringstream &sout, string line)
 {
-    size_t spos;
+    size_t spos, epos;
 
     if (line.substr(0,11) == "LZ4LIB_API ") line = line.substr(11);
+    if (line.substr(0,12) == "LZ4FLIB_API ") line = line.substr(12);
     spos = line.find("/*");
-    if (spos!=string::npos) {
+    epos = line.find("*/");
+    if (spos!=string::npos && epos!=string::npos) {
         sout << line.substr(0, spos);
         sout << "</b>" << line.substr(spos) << "<b>" << endl;
     } else {
@@ -118,7 +120,7 @@
         return 1;
     }
 
-    version = "lz4 " + string(argv[1]) + " Manual";
+    version = string(argv[1]) + " Manual";
 
     istream.open(argv[2], ifstream::in);
     if (!istream.is_open()) {
@@ -158,36 +160,28 @@
             continue;
         }
 
-        /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
-        if ((line.substr(0,3) == "/*=" || line.substr(0,4) == "/**=") && line.find("*/")!=string::npos) {
-            trim_comments(line);
-            trim(line, "= ");
-            sout << "<h3>" << line << "</h3><pre><b>";
-            lines = get_lines(input, ++linenum, "");
-            for (l=0; l<lines.size(); l++) {
-                print_line(sout, lines[l]);
-            }
-            sout << "</b></pre><BR>" << endl;
-            continue;
+        spos = line.find("/**=");
+        if (spos==string::npos) {
+            spos = line.find("/*!");
+            if (spos==string::npos)
+                spos = line.find("/**");
+            if (spos==string::npos)
+                spos = line.find("/*-");
+            if (spos==string::npos)
+                spos = line.find("/*=");
+            if (spos==string::npos)
+                continue;
+            exclam = line[spos+2];
         }
+        else exclam = '=';
 
-        spos = line.find("/*!");
-        if (spos==string::npos)
-            spos = line.find("/**");
-        if (spos==string::npos)
-            spos = line.find("/*-");
-
-        if (spos==string::npos)
-            continue;
-
-        exclam = line[spos+2];
         comments = get_lines(input, linenum, "*/");
         if (!comments.empty()) comments[0] = line.substr(spos+3);
         if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
         for (l=0; l<comments.size(); l++) {
             if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
             else if (comments[l].find("  *")==0) comments[l] = comments[l].substr(3);
-            trim(comments[l], "*-");
+            trim(comments[l], "*-=");
         }
         while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
         while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
@@ -208,6 +202,18 @@
                 print_line(sout, comments[l]);
             }
             sout << "</p></pre><BR>" << endl << endl;
+        } else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
+            trim(comments[0], " ");
+            sout << "<h3>" << comments[0] << "</h3><pre>";
+            for (l=1; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            sout << "</pre><b><pre>";
+            lines = get_lines(input, ++linenum, "");
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</pre></b><BR>" << endl;
         } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
             if (comments.empty()) continue;
 
@@ -238,4 +244,4 @@
     ostream << "</html>" << endl << "</body>" << endl;
 
     return 0;
-}
+}
\ No newline at end of file
diff --git a/doc/images/usingCDict_1_8_2.png b/doc/images/usingCDict_1_8_2.png
new file mode 100644
index 0000000..9434198
--- /dev/null
+++ b/doc/images/usingCDict_1_8_2.png
Binary files differ
diff --git a/doc/lz4_Block_format.md b/doc/lz4_Block_format.md
index 0f6a5ba..5438730 100644
--- a/doc/lz4_Block_format.md
+++ b/doc/lz4_Block_format.md
@@ -1,6 +1,6 @@
 LZ4 Block Format Description
 ============================
-Last revised: 2015-05-07.
+Last revised: 2018-04-25.
 Author : Yann Collet
 
 
@@ -29,8 +29,8 @@
 A sequence is a suite of literals (not-compressed bytes),
 followed by a match copy.
 
-Each sequence starts with a token.
-The token is a one byte value, separated into two 4-bits fields.
+Each sequence starts with a `token`.
+The `token` is a one byte value, separated into two 4-bits fields.
 Therefore each field ranges from 0 to 15.
 
 
@@ -42,46 +42,46 @@
 Each additional byte then represent a value from 0 to 255,
 which is added to the previous value to produce a total length.
 When the byte value is 255, another byte is output.
-There can be any number of bytes following the token. There is no "size limit".
+There can be any number of bytes following `token`. There is no "size limit".
 (Side note : this is why a not-compressible input block is expanded by 0.4%).
 
-Example 1 : A length of 48 will be represented as :
+Example 1 : A literal length of 48 will be represented as :
 
   - 15 : value for the 4-bits High field
   - 33 : (=48-15) remaining length to reach 48
 
-Example 2 : A length of 280 will be represented as :
+Example 2 : A literal length of 280 will be represented as :
 
   - 15  : value for the 4-bits High field
   - 255 : following byte is maxed, since 280-15 >= 255
   - 10  : (=280 - 15 - 255) ) remaining length to reach 280
 
-Example 3 : A length of 15 will be represented as :
+Example 3 : A literal length of 15 will be represented as :
 
   - 15 : value for the 4-bits High field
   - 0  : (=15-15) yes, the zero must be output
 
-Following the token and optional length bytes, are the literals themselves.
+Following `token` and optional length bytes, are the literals themselves.
 They are exactly as numerous as previously decoded (length of literals).
 It's possible that there are zero literal.
 
 
 Following the literals is the match copy operation.
 
-It starts by the offset.
+It starts by the `offset`.
 This is a 2 bytes value, in little endian format
 (the 1st byte is the "low" byte, the 2nd one is the "high" byte).
 
-The offset represents the position of the match to be copied from.
+The `offset` represents the position of the match to be copied from.
 1 means "current position - 1 byte".
-The maximum offset value is 65535, 65536 cannot be coded.
+The maximum `offset` value is 65535, 65536 cannot be coded.
 Note that 0 is an invalid value, not used.
 
-Then we need to extract the match length.
+Then we need to extract the `matchlength`.
 For this, we use the second token field, the low 4-bits.
 Value, obviously, ranges from 0 to 15.
 However here, 0 means that the copy operation will be minimal.
-The minimum length of a match, called minmatch, is 4.
+The minimum length of a match, called `minmatch`, is 4.
 As a consequence, a 0 value means 4 bytes, and a value of 15 means 19+ bytes.
 Similar to literal length, on reaching the highest possible value (15),
 we output additional bytes, one at a time, with values ranging from 0 to 255.
@@ -90,10 +90,18 @@
 There is no limit to the number of optional bytes that can be output this way.
 (This points towards a maximum achievable compression ratio of about 250).
 
-With the offset and the matchlength,
-the decoder can now proceed to copy the data from the already decoded buffer.
-On decoding the matchlength, we reach the end of the compressed sequence,
-and therefore start another one.
+Decoding the `matchlength` reaches the end of current sequence.
+Next byte will be the start of another sequence.
+But before moving to next sequence,
+it's time to use the decoded match position and length.
+The decoder copies `matchlength` bytes from match position to current position.
+
+In some cases, `matchlength` is larger than `offset`.
+Therefore, `match_pos + matchlength > current_pos`,
+which means that later bytes to copy are not yet decoded.
+This is called an "overlap match", and must be handled with special care.
+A common case is an offset of 1,
+meaning the last byte is repeated `matchlength` times.
 
 
 Parsing restrictions
@@ -101,15 +109,28 @@
 There are specific parsing rules to respect in order to remain compatible
 with assumptions made by the decoder :
 
-1. The last 5 bytes are always literals
+1. The last 5 bytes are always literals.  In other words, the last five bytes
+   from the uncompressed input (or all bytes, if the input has less than five
+   bytes) must be encoded as literals on behalf of the last sequence.
+   The last sequence is incomplete, and stops right after the literals.
 2. The last match must start at least 12 bytes before end of block.
-   Consequently, a block with less than 13 bytes cannot be compressed.
+   The last match is part of the penultimate sequence,
+   since the last sequence stops right after literals.
+   Note that, as a consequence, blocks < 13 bytes cannot be compressed.
 
 These rules are in place to ensure that the decoder
-will never read beyond the input buffer, nor write beyond the output buffer.
+can speculatively execute copy instructions
+without ever reading nor writing beyond provided I/O buffers.
 
-Note that the last sequence is also incomplete,
-and stops right after literals.
+1. To copy literals from a non-last sequence, an 8-byte copy instruction
+   can always be safely issued (without reading past the input),
+   because literals are followed by a 2-byte offset,
+   and last sequence is at least 1+5 bytes long.
+2. Similarly, a match operation can speculatively copy up to 12 bytes
+   while remaining within output buffer boundaries.
+
+Empty inputs can be represented with a zero byte,
+interpreted as a token without literals and without a match.
 
 
 Additional notes
diff --git a/doc/lz4_Frame_format.md b/doc/lz4_Frame_format.md
index 2ea1a86..0c98df1 100644
--- a/doc/lz4_Frame_format.md
+++ b/doc/lz4_Frame_format.md
@@ -1,7 +1,7 @@
 LZ4 Frame Format Description
 ============================
 
-###Notices
+### Notices
 
 Copyright (c) 2013-2015 Yann Collet
 
@@ -14,9 +14,9 @@
 are clearly marked.
 Distribution of this document is unlimited.
 
-###Version
+### Version
 
-1.5.1 (31/03/2015)
+1.6.1 (30/01/2018)
 
 
 Introduction
@@ -63,7 +63,7 @@
 
 | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
 |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
-| 4 bytes |  3-11 bytes   |       |       | 4 bytes | 0-4 bytes   |
+| 4 bytes |  3-15 bytes   |       |       | 4 bytes | 0-4 bytes   |
 
 __Magic Number__
 
@@ -72,12 +72,15 @@
 
 __Frame Descriptor__
 
-3 to 11 Bytes, to be detailed in the next part.
-Most important part of the spec.
+3 to 15 Bytes, to be detailed in its own paragraph,
+as it is the most important part of the spec.
+
+The combined __Magic Number__ and __Frame Descriptor__ fields are sometimes
+called ___LZ4 Frame Header___. Its size varies between 7 and 19 bytes.
 
 __Data Blocks__
 
-To be detailed later on.
+To be detailed in its own paragraph.
 That’s where compressed data is stored.
 
 __EndMark__
@@ -98,6 +101,9 @@
 and also that the encoding/decoding process itself generated no distortion.
 Its usage is recommended.
 
+The combined __EndMark__ and __Content Checksum__ fields might sometimes be
+referred to as ___LZ4 Frame Footer___. Its size varies between 4 and 8 bytes.
+
 __Frame Concatenation__
 
 In some circumstances, it may be preferable to append multiple frames,
@@ -118,31 +124,31 @@
 Frame Descriptor
 ----------------
 
-| FLG     | BD      | (Content Size) | HC      |
-| ------- | ------- |:--------------:| ------- |
-| 1 byte  | 1 byte  |  0 - 8 bytes   | 1 byte  |
+| FLG     | BD      | (Content Size) | (Dictionary ID) | HC      |
+| ------- | ------- |:--------------:|:---------------:| ------- |
+| 1 byte  | 1 byte  |  0 - 8 bytes   |   0 - 4 bytes   | 1 byte  |
 
 The descriptor uses a minimum of 3 bytes,
-and up to 11 bytes depending on optional parameters.
+and up to 15 bytes depending on optional parameters.
 
 __FLG byte__
 
-|  BitNb  |   7-6   |    5    |     4     |   3     |     2     |    1-0   |
-| ------- | ------- | ------- | --------- | ------- | --------- | -------- |
-|FieldName| Version | B.Indep | B.Checksum| C.Size  | C.Checksum|*Reserved*|
+|  BitNb  |  7-6  |   5   |    4     |  3   |    2     |    1     |   0  |
+| ------- |-------|-------|----------|------|----------|----------|------|
+|FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
 
 
 __BD byte__
 
-|  BitNb  |     7    |     6-5-4    |  3-2-1-0 |
-| ------- | -------- | ------------ | -------- |
-|FieldName|*Reserved*| Block MaxSize|*Reserved*|
+|  BitNb  |     7    |     6-5-4     |  3-2-1-0 |
+| ------- | -------- | ------------- | -------- |
+|FieldName|*Reserved*| Block MaxSize |*Reserved*|
 
 In the tables, bit 7 is highest bit, while bit 0 is lowest.
 
 __Version Number__
 
-2-bits field, must be set to “01”.
+2-bits field, must be set to `01`.
 Any other value cannot be decoded by this version of the specification.
 Other version numbers will use different flag layouts.
 
@@ -154,7 +160,7 @@
 In such case, it’s necessary to decode all blocks in sequence.
 
 Block dependency improves compression ratio, especially for small blocks.
-On the other hand, it makes direct jumps or multi-threaded decoding impossible.
+On the other hand, it makes random access or multi-threaded decoding impossible.
 
 __Block checksum flag__
 
@@ -172,13 +178,17 @@
 
 __Content checksum flag__
 
-If this flag is set, a content checksum will be appended after the EndMark.
+If this flag is set, a 32-bits content checksum will be appended
+after the EndMark.
 
-Recommended value : “1” (content checksum is present)
+__Dictionary ID flag__
+
+If this flag is set, a 4-bytes Dict-ID field will be present,
+after the descriptor flags and the Content Size.
 
 __Block Maximum Size__
 
-This information is intended to help the decoder allocate memory.
+This information is useful to help the decoder allocate memory.
 Size here refers to the original (uncompressed) data size.
 Block Maximum Size is one value among the following table :
 
@@ -186,17 +196,17 @@
 | --- | --- | --- | --- | ----- | ------ | ---- | ---- |
 | N/A | N/A | N/A | N/A | 64 KB | 256 KB | 1 MB | 4 MB |
 
-The decoder may refuse to allocate block sizes above a (system-specific) size.
+The decoder may refuse to allocate block sizes above any system-specific size.
 Unused values may be used in a future revision of the spec.
-A decoder conformant to the current version of the spec
-is only able to decode blocksizes defined in this spec.
+A decoder conformant with the current version of the spec
+is only able to decode block sizes defined in this spec.
 
 __Reserved bits__
 
 Value of reserved bits **must** be 0 (zero).
 Reserved bit might be used in a future version of the specification,
 typically enabling new optional features.
-If this happens, a decoder respecting the current version of the specification
+When this happens, a decoder respecting the current specification version
 shall not be able to decode such a frame.
 
 __Content Size__
@@ -208,12 +218,32 @@
 This value is informational, typically for display or memory allocation.
 It can be skipped by a decoder, or used to validate content correctness.
 
+__Dictionary ID__
+
+Dict-ID is only present if the associated flag is set.
+It's an unsigned 32-bits value, stored using little-endian convention.
+A dictionary is useful to compress short input sequences.
+The compressor can take advantage of the dictionary context
+to encode the input in a more compact manner.
+It works as a kind of “known prefix” which is used by
+both the compressor and the decompressor to “warm-up” reference tables.
+
+The decompressor can use Dict-ID identifier to determine
+which dictionary must be used to correctly decode data.
+The compressor and the decompressor must use exactly the same dictionary.
+It's presumed that the 32-bits dictID uniquely identifies a dictionary.
+
+Within a single frame, a single dictionary can be defined.
+When the frame descriptor defines independent blocks,
+each block will be initialized with the same dictionary.
+If the frame descriptor defines linked blocks,
+the dictionary will only be used once, at the beginning of the frame.
+
 __Header Checksum__
 
 One-byte checksum of combined descriptor fields, including optional ones.
-The value is the second byte of xxh32() : ` (xxh32()>>8) & 0xFF `
-using zero as a seed,
-and the full Frame Descriptor as an input
+The value is the second byte of `xxh32()` : ` (xxh32()>>8) & 0xFF `
+using zero as a seed, and the full Frame Descriptor as an input
 (including optional fields when they are present).
 A wrong checksum indicates an error in the descriptor.
 Header checksum is informational and can be skipped.
@@ -347,7 +377,7 @@
 
 Alternatively, if the frame is followed by a valid Frame Magic Number,
 it is considered completed.
-It makes legacy frames compatible with frame concatenation.
+This policy makes it possible to concatenate legacy frames.
 
 Any other value will be interpreted as a block size,
 and trigger an error if it does not fit within acceptable range.
@@ -356,7 +386,11 @@
 Version changes
 ---------------
 
-1.5.1 : changed format to MarkDown compatible
+1.6.1 : introduced terms "LZ4 Frame Header" and "LZ4 Frame Footer"
+
+1.6.0 : restored Dictionary ID field in Frame header
+
+1.5.1 : changed document format to MarkDown
 
 1.5 : removed Dictionary ID from specification
 
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index bc46645..6ebf8d2 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -1,20 +1,23 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>lz4 1.7.2 Manual</title>
+<title>1.8.3 Manual</title>
 </head>
 <body>
-<h1>lz4 1.7.2 Manual</h1>
+<h1>1.8.3 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
 <li><a href="#Chapter1">Introduction</a></li>
-<li><a href="#Chapter2">Tuning parameter</a></li>
-<li><a href="#Chapter3">Private definitions</a></li>
+<li><a href="#Chapter2">Version</a></li>
+<li><a href="#Chapter3">Tuning parameter</a></li>
 <li><a href="#Chapter4">Simple Functions</a></li>
 <li><a href="#Chapter5">Advanced Functions</a></li>
 <li><a href="#Chapter6">Streaming Compression Functions</a></li>
 <li><a href="#Chapter7">Streaming Decompression Functions</a></li>
+<li><a href="#Chapter8">Unstable declarations</a></li>
+<li><a href="#Chapter9">Private definitions</a></li>
+<li><a href="#Chapter10">Obsolete Functions</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -29,89 +32,57 @@
     - unbounded multiple steps (described as Streaming compression)
 
   lz4.h provides block compression functions. It gives full buffer control to user.
-  Block compression functions are not-enough to send information,
-  since it's still necessary to provide metadata (such as compressed size),
-  and each application can do it in whichever way it wants.
-  For interoperability, there is LZ4 frame specification (doc/lz4_Frame_format.md).
+  Decompressing an lz4-compressed block also requires metadata (such as compressed size).
+  Each application is free to encode such metadata in whichever way it wants.
+
+  An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
+  take care of encoding standard metadata alongside LZ4-compressed blocks.
+  If your application requires interoperability, it's recommended to use it.
   A library is provided to take care of it, see lz4frame.h.
 <BR></pre>
 
-<h3>Version</h3><pre><b>int LZ4_versionNumber (void);
-const char* LZ4_versionString (void);
-</b></pre><BR>
-<a name="Chapter2"></a><h2>Tuning parameter</h2><pre></pre>
+<a name="Chapter2"></a><h2>Version</h2><pre></pre>
 
-<pre><b>#define LZ4_MEMORY_USAGE 14
+<pre><b>int LZ4_versionNumber (void);  </b>/**< library version number; useful to check dll version */<b>
+</b></pre><BR>
+<pre><b>const char* LZ4_versionString (void);   </b>/**< library version string; unseful to check dll version */<b>
+</b></pre><BR>
+<a name="Chapter3"></a><h2>Tuning parameter</h2><pre></pre>
+
+<pre><b>#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
 </b><p> Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
  Increasing memory usage improves compression ratio
- Reduced memory usage can improve speed, due to cache effect
+ Reduced memory usage may improve speed, thanks to cache effect
  Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  
 </p></pre><BR>
 
-<a name="Chapter3"></a><h2>Private definitions</h2><pre>
- Do not use these definitions.
- They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- If you use these definitions in your code, it will break when you upgrade LZ4 to a new version.
-<BR></pre>
-
-<pre><b>typedef struct {
-    uint32_t hashTable[LZ4_HASH_SIZE_U32];
-    uint32_t currentOffset;
-    uint32_t initCheck;
-    const uint8_t* dictionary;
-    uint8_t* bufferStart;   </b>/* obsolete, used for slideInputBuffer */<b>
-    uint32_t dictSize;
-} LZ4_stream_t_internal;
-</b></pre><BR>
-<pre><b>typedef struct {
-    const uint8_t* externalDict;
-    size_t extDictSize;
-    const uint8_t* prefixEnd;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-</b></pre><BR>
-<pre><b>typedef struct {
-    unsigned int hashTable[LZ4_HASH_SIZE_U32];
-    unsigned int currentOffset;
-    unsigned int initCheck;
-    const unsigned char* dictionary;
-    unsigned char* bufferStart;   </b>/* obsolete, used for slideInputBuffer */<b>
-    unsigned int dictSize;
-} LZ4_stream_t_internal;
-</b></pre><BR>
-<pre><b>typedef struct {
-    const unsigned char* externalDict;
-    size_t extDictSize;
-    const unsigned char* prefixEnd;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-</b></pre><BR>
 <a name="Chapter4"></a><h2>Simple Functions</h2><pre></pre>
 
-<pre><b>int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
-</b><p>    Compresses 'sourceSize' bytes from buffer 'source'
-    into already allocated 'dest' buffer of size 'maxDestSize'.
-    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+<pre><b>int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+</b><p>    Compresses 'srcSize' bytes from buffer 'src'
+    into already allocated 'dst' buffer of size 'dstCapacity'.
+    Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
     It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'source' into a more limited 'dest' budget,
+    If the function cannot compress 'src' into a more limited 'dst' budget,
     compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dest' content is not valid.
-    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
-        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
-        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
-              or 0 if compression fails 
+    Note : as a consequence, 'dst' content is not valid.
+    Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+        srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+        dstCapacity : size of buffer 'dst' (which must be already allocated)
+        return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+                  or 0 if compression fails 
 </p></pre><BR>
 
-<pre><b>int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-</b><p>    compressedSize : is the precise full size of the compressed block.
-    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
-             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+<pre><b>int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+</b><p>    compressedSize : is the exact complete size of the compressed block.
+    dstCapacity : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+             If destination buffer is not large enough, decoding will stop and output an error code (negative value).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
+             This function is protected against malicious data packets.
 </p></pre><BR>
 
 <a name="Chapter5"></a><h2>Advanced Functions</h2><pre></pre>
@@ -120,168 +91,367 @@
 </b><p>    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
     This function is primarily useful for memory allocation purposes (destination buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
         inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
         return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+              or 0, if input size is incorrect (too large or negative)
 </p></pre><BR>
 
-<pre><b>int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
-</b><p>    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+<pre><b>int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+</b><p>    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
     An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
 </p></pre><BR>
 
 <pre><b>int LZ4_sizeofState(void);
-int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 </b><p>    Same compression function, just using an externally allocated memory space to store compression state.
     Use LZ4_sizeofState() to know how much memory must be allocated,
     and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
+    Then, provide this buffer as 'void* state' to compression function.
 </p></pre><BR>
 
-<pre><b>int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
-</b><p>    Reverse the logic, by compressing as much data as possible from 'source' buffer
-    into already allocated buffer 'dest' of size 'targetDestSize'.
-    This function either compresses the entire 'source' content into 'dest' if it's large enough,
-    or fill 'dest' buffer completely with as much data as possible from 'source'.
-        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
-                         New value is necessarily <= old value.
-        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
-              or 0 if compression fails
+<pre><b>int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+</b><p>  Reverse the logic : compresses as much data as possible from 'src' buffer
+  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+  or fill 'dst' buffer completely with as much data as possible from 'src'.
+  note: acceleration parameter is fixed to "default".
+
+ *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+               New value is necessarily <= input value.
+ @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+           or 0 if compression fails.
 </p></pre><BR>
 
-<pre><b>int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
-</b><p>    originalSize : is the original and therefore uncompressed size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
-    note : This function fully respect memory boundaries for properly formed compressed data.
-           It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionally modified data stream (malicious input).
-           Use this function in trusted environment only (data to decode comes from a trusted source).
+<pre><b>int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+</b><p>  This function used to be a bit faster than LZ4_decompress_safe(),
+  though situation has changed in recent versions,
+  and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`.
+  Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data.
+  As a consequence, this function is no longer recommended, and may be deprecated in future versions.
+  It's only remaining specificity is that it can decompress data without knowing its compressed size.
+
+  originalSize : is the uncompressed size to regenerate.
+                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ @return : number of bytes read from source buffer (== compressed size).
+           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+  note : This function requires uncompressed originalSize to be known in advance.
+         The function never writes past the output buffer.
+         However, since it doesn't know its 'src' size, it may read past the intended input.
+         Also, because match offsets are not validated during decoding,
+         reads from 'src' may underflow.
+         Use this function in trusted environment **only**.
+ 
 </p></pre><BR>
 
-<pre><b>int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
-</b><p>    This function decompress a compressed block of size 'compressedSize' at position 'source'
-    into destination buffer 'dest' of size 'maxDecompressedSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+<pre><b>int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+</b><p>  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+  into destination buffer 'dst' of size 'dstCapacity'.
+  Up to 'targetOutputSize' bytes will be decoded.
+  The function stops decoding on reaching this objective,
+  which can boost performance when only the beginning of a block is required.
+
+ @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+           If source stream is detected malformed, function returns a negative result.
+
+  Note : @return can be < targetOutputSize, if compressed block contains less data.
+
+  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
+           and expects targetOutputSize <= dstCapacity.
+           It effectively stops decoding on reaching targetOutputSize,
+           so dstCapacity is kind of redundant.
+           This is because in a previous version of this function,
+           decoding operation would not "break" a sequence in the middle.
+           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+           it could write more bytes, though only up to dstCapacity.
+           Some "margin" used to be required for this operation to work properly.
+           This is no longer necessary.
+           The function nonetheless keeps its signature, in an effort to not break API.
+ 
 </p></pre><BR>
 
 <a name="Chapter6"></a><h2>Streaming Compression Functions</h2><pre></pre>
 
-<pre><b>typedef struct {
-  union {
-    long long table[LZ4_STREAMSIZE_U64];
-    LZ4_stream_t_internal internal_donotuse;
-  };
-} LZ4_stream_t;
-</b><p> information structure to track an LZ4 stream.
- important : init this structure content before first use !
- note : only allocated directly the structure if you are statically linking LZ4
-        If you are using liblz4 as a DLL, please use below construction methods instead.
- 
-</p></pre><BR>
-
-<pre><b>void LZ4_resetStream (LZ4_stream_t* streamPtr);
-</b><p>  Use this function to init an allocated `LZ4_stream_t` structure
- 
-</p></pre><BR>
-
 <pre><b>LZ4_stream_t* LZ4_createStream(void);
 int           LZ4_freeStream (LZ4_stream_t* streamPtr);
 </b><p>  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
   LZ4_freeStream() releases its memory.
-  In the context of a DLL (liblz4), please use these methods rather than the static struct.
-  They are more future proof, in case of a change of `LZ4_stream_t` size.
+ 
+</p></pre><BR>
+
+<pre><b>void LZ4_resetStream (LZ4_stream_t* streamPtr);
+</b><p>  An LZ4_stream_t structure can be allocated once and re-used multiple times.
+  Use this function to start compressing a new stream.
  
 </p></pre><BR>
 
 <pre><b>int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
-</b><p>  Use this function to load a static dictionary into LZ4_stream.
+</b><p>  Use this function to load a static dictionary into LZ4_stream_t.
   Any previous data will be forgotten, only 'dictionary' will remain in memory.
-  Loading a size of 0 is allowed.
-  Return : dictionary size, in bytes (necessarily <= 64 KB)
+  Loading a size of 0 is allowed, and is the same as reset.
+ @return : dictionary size, in bytes (necessarily <= 64 KB)
  
 </p></pre><BR>
 
-<pre><b>int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
-</b><p>  Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
-  Important : Previous data blocks are assumed to still be present and unmodified !
+<pre><b>int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+</b><p>  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
   'dst' buffer must be already allocated.
-  If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
-  If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+
+ @return : size of compressed block
+           or 0 if there is an error (typically, cannot fit into 'dst').
+
+  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+           Each block has precise boundaries.
+           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+           Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata.
+
+  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory!
+
+  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+           Make sure that buffers are separated, by at least one byte.
+           This construction ensures that each block only depends on previous block.
+
+  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+
+  Note 5 : After an error, the stream status is invalid, it can only be reset or freed.
  
 </p></pre><BR>
 
-<pre><b>int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
-</b><p>  If previously compressed data block is not guaranteed to remain available at its memory location,
+<pre><b>int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+</b><p>  If last 64KB data cannot be guaranteed to remain available at its current memory location,
   save it into a safer place (char* safeBuffer).
-  Note : you don't need to call LZ4_loadDict() afterwards,
-         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
-  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
  
 </p></pre><BR>
 
-<a name="Chapter7"></a><h2>Streaming Decompression Functions</h2><pre></pre>
+<a name="Chapter7"></a><h2>Streaming Decompression Functions</h2><pre>  Bufferless synchronous API
+<BR></pre>
 
-<pre><b>typedef struct {
-  union {
-    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
-    LZ4_streamDecode_t_internal internal_donotuse;
-  };
-</b></pre><BR>
 <pre><b>LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-</b><p> information structure to track an LZ4 stream.
- init this structure content using LZ4_setStreamDecode or memset() before first use !
-
- In the context of a DLL (liblz4) please prefer usage of construction methods below.
- They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
- LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
- LZ4_freeStreamDecode releases its memory.
+</b><p>  creation / destruction of streaming decompression tracking context.
+  A tracking context can be re-used multiple times.
  
 </p></pre><BR>
 
 <pre><b>int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-</b><p>  Use this function to instruct where to find the dictionary.
-  Setting a size of 0 is allowed (same effect as reset).
-  @return : 1 if OK, 0 if error
+</b><p>  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+  Use this function to start decompression of a new stream of blocks.
+  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ @return : 1 if OK, 0 if error
  
 </p></pre><BR>
 
-<pre><b>int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
-</b><p>    These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
-    In the case of a ring buffers, decoding buffer must be either :
-    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
-      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
-    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
-      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including small ones ( < 64 KB).
-    - _At least_ 64 KB + 8 bytes + maxBlockSize.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including larger than decoding buffer.
-    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
-    and indicate where it is saved using LZ4_setStreamDecode()
+<pre><b>int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs))  </b>/* for static allocation; mbs presumed valid */<b>
+</b><p>  Note : in a ring buffer scenario (optional),
+  blocks are presumed decompressed next to each other
+  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+  at which stage it resumes from beginning of ring buffer.
+  When setting such a ring buffer for streaming decompression,
+  provides the minimum size of this ring buffer
+  to be compatible with any source respecting maxBlockSize condition.
+ @return : minimum ring buffer size,
+           or 0 if there is an error (invalid maxBlockSize).
+ 
 </p></pre><BR>
 
-<pre><b>int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
-int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
-</b><p>Advanced decoding functions :
-    These decoding functions work the same as
-    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
-    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
+<pre><b>int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+</b><p>  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+  A block is an unsplittable entity, it must be presented entirely to a decompression function.
+  Decompression functions only accepts one block at a time.
+  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+  If less than 64KB of data has been decoded, all the data must be present.
+
+  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+    In which case, encoding and decoding buffers do not need to be synchronized.
+    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+  - Synchronized mode :
+    Decompression buffer size is _exactly_ the same as compression buffer size,
+    and follows exactly same update rule (block boundaries at same positions),
+    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+    In which case, encoding and decoding buffers do not need to be synchronized,
+    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+
+  Whenever these conditions are not possible,
+  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+</p></pre><BR>
+
+<pre><b>int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+</b><p>  These decoding functions work the same as
+  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ 
+</p></pre><BR>
+
+<a name="Chapter8"></a><h2>Unstable declarations</h2><pre>
+ Declarations in this section should be considered unstable.
+ Use at your own peril, etc., etc.
+ They may be removed in the future.
+ Their signatures may change.
+<BR></pre>
+
+<pre><b>void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+</b><p>  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
+  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
+
+  Note:
+  Using this in advance of a non- streaming-compression function is redundant,
+  and potentially bad for performance, since they all perform their own custom
+  reset internally.
+
+  Differences from LZ4_resetStream():
+  When an LZ4_stream_t is known to be in a internally coherent state,
+  it can often be prepared for a new compression with almost no work, only
+  sometimes falling back to the full, expensive reset that is always required
+  when the stream is in an indeterminate state (i.e., the reset performed by
+  LZ4_resetStream()).
+
+  LZ4_streams are guaranteed to be in a valid state when:
+  - returned from LZ4_createStream()
+  - reset by LZ4_resetStream()
+  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
+  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
+  - the stream was in a valid state and was then used in any compression call
+    that returned success
+  - the stream was in an indeterminate state and was used in a compression
+    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
+    that returned success
+
+  When a stream isn't known to be in a valid state, it is not safe to pass to
+  any fastReset or streaming function. It must first be cleansed by the full
+  LZ4_resetStream().
+ 
+</p></pre><BR>
+
+<pre><b>int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+</b><p>  A variant of LZ4_compress_fast_extState().
+
+  Using this variant avoids an expensive initialization step. It is only safe
+  to call if the state buffer is known to be correctly initialized already
+  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
+  initialized"). From a high level, the difference is that this function
+  initializes the provided state with a call to something like
+  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
+  call to LZ4_resetStream().
+ 
+</p></pre><BR>
+
+<pre><b>void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
+</b><p>  This is an experimental API that allows for the efficient use of a
+  static dictionary many times.
+
+  Rather than re-loading the dictionary buffer into a working context before
+  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+  in which the working stream references the dictionary stream in-place.
+
+  Several assumptions are made about the state of the dictionary stream.
+  Currently, only streams which have been prepared by LZ4_loadDict() should
+  be expected to work.
+
+  Alternatively, the provided dictionary stream pointer may be NULL, in which
+  case any existing dictionary stream is unset.
+
+  If a dictionary is provided, it replaces any pre-existing stream history.
+  The dictionary contents are the only history that can be referenced and
+  logically immediately precede the data compressed in the first subsequent
+  compression call.
+
+  The dictionary will only remain attached to the working stream through the
+  first compression call, at the end of which it is cleared. The dictionary
+  stream (and source buffer) must remain in-place / accessible / unchanged
+  through the completion of the first compression call on the stream.
+ 
+</p></pre><BR>
+
+<a name="Chapter9"></a><h2>Private definitions</h2><pre>
+ Do not use these definitions.
+ They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ Using these definitions will expose code to API and/or ABI break in future versions of the library.
+<BR></pre>
+
+<pre><b>typedef struct {
+    const uint8_t* externalDict;
+    size_t extDictSize;
+    const uint8_t* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+</b></pre><BR>
+<pre><b>typedef struct {
+    const unsigned char* externalDict;
+    size_t extDictSize;
+    const unsigned char* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+</b></pre><BR>
+<pre><b>#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+union LZ4_stream_u {
+    unsigned long long table[LZ4_STREAMSIZE_U64];
+    LZ4_stream_t_internal internal_donotuse;
+} ;  </b>/* previously typedef'd to LZ4_stream_t */<b>
+</b><p> information structure to track an LZ4 stream.
+ init this structure before first use.
+ note : only use in association with static linking !
+        this definition is not API/ABI safe,
+        it may change in a future version !
+ 
+</p></pre><BR>
+
+<pre><b>#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   </b>/* previously typedef'd to LZ4_streamDecode_t */<b>
+</b><p> information structure to track an LZ4 stream during decompression.
+ init this structure  using LZ4_setStreamDecode (or memset()) before first use
+ note : only use in association with static linking !
+        this definition is not API/ABI safe,
+        and may change in a future version !
+ 
+</p></pre><BR>
+
+<a name="Chapter10"></a><h2>Obsolete Functions</h2><pre></pre>
+
+<pre><b>#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   </b>/* disable deprecation warnings */<b>
+#else
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) </b>/* C++14 or greater */<b>
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif </b>/* LZ4_DISABLE_DEPRECATE_WARNINGS */<b>
+</b><p>   Should deprecation warnings be a problem,
+   it is generally possible to disable them,
+   typically with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS 
 </p></pre><BR>
 
 </html>
diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html
new file mode 100644
index 0000000..fb8e0ce
--- /dev/null
+++ b/doc/lz4frame_manual.html
@@ -0,0 +1,352 @@
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>1.8.3 Manual</title>
+</head>
+<body>
+<h1>1.8.3 Manual</h1>
+<hr>
+<a name="Contents"></a><h2>Contents</h2>
+<ol>
+<li><a href="#Chapter1">Introduction</a></li>
+<li><a href="#Chapter2">Compiler specifics</a></li>
+<li><a href="#Chapter3">Error management</a></li>
+<li><a href="#Chapter4">Frame compression types</a></li>
+<li><a href="#Chapter5">Simple compression function</a></li>
+<li><a href="#Chapter6">Advanced compression functions</a></li>
+<li><a href="#Chapter7">Resource Management</a></li>
+<li><a href="#Chapter8">Compression</a></li>
+<li><a href="#Chapter9">Decompression functions</a></li>
+<li><a href="#Chapter10">Streaming decompression functions</a></li>
+<li><a href="#Chapter11">Bulk processing dictionary API</a></li>
+</ol>
+<hr>
+<a name="Chapter1"></a><h2>Introduction</h2><pre>
+  lz4frame.h implements LZ4 frame specification (doc/lz4_Frame_format.md).
+  lz4frame.h provides frame compression functions that take care
+  of encoding standard metadata alongside LZ4-compressed blocks.
+<BR></pre>
+
+<a name="Chapter2"></a><h2>Compiler specifics</h2><pre></pre>
+
+<a name="Chapter3"></a><h2>Error management</h2><pre></pre>
+
+<pre><b>unsigned    LZ4F_isError(LZ4F_errorCode_t code);   </b>/**< tells when a function result is an error code */<b>
+</b></pre><BR>
+<pre><b>const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   </b>/**< return error code string; for debugging */<b>
+</b></pre><BR>
+<a name="Chapter4"></a><h2>Frame compression types</h2><pre></pre>
+
+<pre><b>typedef enum {
+    LZ4F_default=0,
+    LZ4F_max64KB=4,
+    LZ4F_max256KB=5,
+    LZ4F_max1MB=6,
+    LZ4F_max4MB=7
+    LZ4F_OBSOLETE_ENUM(max64KB)
+    LZ4F_OBSOLETE_ENUM(max256KB)
+    LZ4F_OBSOLETE_ENUM(max1MB)
+    LZ4F_OBSOLETE_ENUM(max4MB)
+} LZ4F_blockSizeID_t;
+</b></pre><BR>
+<pre><b>typedef enum {
+    LZ4F_blockLinked=0,
+    LZ4F_blockIndependent
+    LZ4F_OBSOLETE_ENUM(blockLinked)
+    LZ4F_OBSOLETE_ENUM(blockIndependent)
+} LZ4F_blockMode_t;
+</b></pre><BR>
+<pre><b>typedef enum {
+    LZ4F_noContentChecksum=0,
+    LZ4F_contentChecksumEnabled
+    LZ4F_OBSOLETE_ENUM(noContentChecksum)
+    LZ4F_OBSOLETE_ENUM(contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+</b></pre><BR>
+<pre><b>typedef enum {
+    LZ4F_noBlockChecksum=0,
+    LZ4F_blockChecksumEnabled
+} LZ4F_blockChecksum_t;
+</b></pre><BR>
+<pre><b>typedef enum {
+    LZ4F_frame=0,
+    LZ4F_skippableFrame
+    LZ4F_OBSOLETE_ENUM(skippableFrame)
+} LZ4F_frameType_t;
+</b></pre><BR>
+<pre><b>typedef struct {
+  LZ4F_blockSizeID_t     blockSizeID;         </b>/* max64KB, max256KB, max1MB, max4MB; 0 == default */<b>
+  LZ4F_blockMode_t       blockMode;           </b>/* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */<b>
+  LZ4F_contentChecksum_t contentChecksumFlag; </b>/* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */<b>
+  LZ4F_frameType_t       frameType;           </b>/* read-only field : LZ4F_frame or LZ4F_skippableFrame */<b>
+  unsigned long long     contentSize;         </b>/* Size of uncompressed content ; 0 == unknown */<b>
+  unsigned               dictID;              </b>/* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */<b>
+  LZ4F_blockChecksum_t   blockChecksumFlag;   </b>/* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */<b>
+} LZ4F_frameInfo_t;
+</b><p>  makes it possible to set or read frame parameters.
+  It's not required to set all fields, as long as the structure was initially memset() to zero.
+  For all fields, 0 sets it to default value 
+</p></pre><BR>
+
+<pre><b>typedef struct {
+  LZ4F_frameInfo_t frameInfo;
+  int      compressionLevel;    </b>/* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */<b>
+  unsigned autoFlush;           </b>/* 1: always flush, to reduce usage of internal buffers */<b>
+  unsigned favorDecSpeed;       </b>/* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */  /* >= v1.8.2 */<b>
+  unsigned reserved[3];         </b>/* must be zero for forward compatibility */<b>
+} LZ4F_preferences_t;
+</b><p>  makes it possible to supply detailed compression parameters to the stream interface.
+  Structure is presumed initially memset() to zero, representing default settings.
+  All reserved fields must be set to zero. 
+</p></pre><BR>
+
+<a name="Chapter5"></a><h2>Simple compression function</h2><pre></pre>
+
+<pre><b>size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+</b><p>  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+  Note : this result is only usable with LZ4F_compressFrame().
+         It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed.
+ 
+</p></pre><BR>
+
+<pre><b>size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                                const void* srcBuffer, size_t srcSize,
+                                const LZ4F_preferences_t* preferencesPtr);
+</b><p>  Compress an entire srcBuffer into a valid LZ4 frame.
+  dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ @return : number of bytes written into dstBuffer.
+           or an error code if it fails (can be tested using LZ4F_isError())
+ 
+</p></pre><BR>
+
+<a name="Chapter6"></a><h2>Advanced compression functions</h2><pre></pre>
+
+<pre><b>typedef struct {
+  unsigned stableSrc;    </b>/* 1 == src content will remain present on future calls to LZ4F_compress(); skip copying src content within tmp buffer */<b>
+  unsigned reserved[3];
+} LZ4F_compressOptions_t;
+</b></pre><BR>
+<a name="Chapter7"></a><h2>Resource Management</h2><pre></pre>
+
+<pre><b>LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
+</b><p> The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ This is achieved using LZ4F_createCompressionContext(), which takes as argument a version.
+ The version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL.
+ The function will provide a pointer to a fully allocated LZ4F_cctx object.
+ If @return != zero, there was an error during context creation.
+ Object can release its memory using LZ4F_freeCompressionContext();
+ 
+</p></pre><BR>
+
+<a name="Chapter8"></a><h2>Compression</h2><pre></pre>
+
+<pre><b>size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+                                      void* dstBuffer, size_t dstCapacity,
+                                      const LZ4F_preferences_t* prefsPtr);
+</b><p>  will write the frame header into dstBuffer.
+  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default.
+ @return : number of bytes written into dstBuffer for the header
+           or an error code (which can be tested using LZ4F_isError())
+ 
+</p></pre><BR>
+
+<pre><b>size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+</b><p>  Provides minimum dstCapacity required to guarantee compression success
+  given a srcSize and preferences, covering worst case scenario.
+  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+  Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(),
+  Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+  It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd().
+  Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin().
+  Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
+  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ 
+</p></pre><BR>
+
+<pre><b>size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+                                       void* dstBuffer, size_t dstCapacity,
+                                 const void* srcBuffer, size_t srcSize,
+                                 const LZ4F_compressOptions_t* cOptPtr);
+</b><p>  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+  Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+  This value is provided by LZ4F_compressBound().
+  If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+  LZ4F_compressUpdate() doesn't guarantee error recovery.
+  When an error occurs, compression context must be freed or resized.
+ `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
+ @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
+           or an error code if it fails (which can be tested using LZ4F_isError())
+ 
+</p></pre><BR>
+
+<pre><b>size_t LZ4F_flush(LZ4F_cctx* cctx,
+                              void* dstBuffer, size_t dstCapacity,
+                        const LZ4F_compressOptions_t* cOptPtr);
+</b><p>  When data must be generated and sent immediately, without waiting for a block to be completely filled,
+  it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+ `dstCapacity` must be large enough to ensure the operation will be successful.
+ `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
+ @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
+           or an error code if it fails (which can be tested using LZ4F_isError())
+ 
+</p></pre><BR>
+
+<pre><b>size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+                                    void* dstBuffer, size_t dstCapacity,
+                              const LZ4F_compressOptions_t* cOptPtr);
+</b><p>  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+  It will flush whatever data remained within `cctx` (like LZ4_flush())
+  and properly finalize the frame, with an endMark and a checksum.
+ `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
+ @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
+           or an error code if it fails (which can be tested using LZ4F_isError())
+  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ 
+</p></pre><BR>
+
+<a name="Chapter9"></a><h2>Decompression functions</h2><pre></pre>
+
+<pre><b>typedef struct {
+  unsigned stableDst;    </b>/* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */<b>
+  unsigned reserved[3];  </b>/* must be set to zero for forward compatibility */<b>
+} LZ4F_decompressOptions_t;
+</b></pre><BR>
+<pre><b>LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
+</b><p>  Create an LZ4F_dctx object, to track all decompression operations.
+  The version provided MUST be LZ4F_VERSION.
+  The function provides a pointer to an allocated and initialized LZ4F_dctx object.
+  The result is an errorCode, which can be tested using LZ4F_isError().
+  dctx memory can be released using LZ4F_freeDecompressionContext();
+  Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
+  That is, it should be == 0 if decompression has been completed fully and correctly.
+ 
+</p></pre><BR>
+
+<a name="Chapter10"></a><h2>Streaming decompression functions</h2><pre></pre>
+
+<pre><b>size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+                                     LZ4F_frameInfo_t* frameInfoPtr,
+                                     const void* srcBuffer, size_t* srcSizePtr);
+</b><p>  This function extracts frame parameters (max blockSize, dictID, etc.).
+  Its usage is optional.
+  Extracted information is typically useful for allocation and dictionary.
+  This function works in 2 situations :
+   - At the beginning of a new frame, in which case
+     it will decode information from `srcBuffer`, starting the decoding process.
+     Input size must be large enough to successfully decode the entire frame header.
+     Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes.
+     It's allowed to provide more input data than this minimum.
+   - After decoding has been started.
+     In which case, no input is read, frame parameters are extracted from dctx.
+   - If decoding has barely started, but not yet extracted information from header,
+     LZ4F_getFrameInfo() will fail.
+  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+  Decompression must resume from (srcBuffer + *srcSizePtr).
+ @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+           or an error code which can be tested using LZ4F_isError().
+  note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
+  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ 
+</p></pre><BR>
+
+<pre><b>size_t LZ4F_decompress(LZ4F_dctx* dctx,
+                                   void* dstBuffer, size_t* dstSizePtr,
+                                   const void* srcBuffer, size_t* srcSizePtr,
+                                   const LZ4F_decompressOptions_t* dOptPtr);
+</b><p>  Call this function repetitively to regenerate compressed data from `srcBuffer`.
+  The function will read up to *srcSizePtr bytes from srcBuffer,
+  and decompress data into dstBuffer, of capacity *dstSizePtr.
+
+  The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+  The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
+
+  The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
+  Unconsumed source data must be presented again in subsequent invocations.
+
+ `dstBuffer` can freely change between each consecutive function invocation.
+ `dstBuffer` content will be overwritten.
+
+ @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
+  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+  Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
+  This is just a hint though, it's always possible to provide any srcSize.
+
+  When a frame is fully decoded, @return will be 0 (no more data expected).
+  When provided with more bytes than necessary to decode a frame,
+  LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0.
+
+  If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+  After a decompression error, the `dctx` context is not resumable.
+  Use LZ4F_resetDecompressionContext() to return to clean state.
+
+  After a frame is fully decoded, dctx can be used again to decompress another frame.
+ 
+</p></pre><BR>
+
+<pre><b>void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx);   </b>/* always successful */<b>
+</b><p>  In case of an error, the context is left in "undefined" state.
+  In which case, it's necessary to reset it, before re-using it.
+  This method can also be used to abruptly stop any unfinished decompression,
+  and start a new one using same context resources. 
+</p></pre><BR>
+
+<pre><b>typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+</b></pre><BR>
+<a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre>
+
+<pre><b>LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+</b><p>  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict 
+</p></pre><BR>
+
+<pre><b>LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dst, size_t dstCapacity,
+    const void* src, size_t srcSize,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* preferencesPtr);
+</b><p>  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+  cctx must point to a context created by LZ4F_createCompressionContext().
+  If cdict==NULL, compress without a dictionary.
+  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+  If this condition is not respected, function will fail (@return an errorCode).
+  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+  but it's not recommended, as it's the only way to provide dictID in the frame header.
+ @return : number of bytes written into dstBuffer.
+           or an error code if it fails (can be tested using LZ4F_isError()) 
+</p></pre><BR>
+
+<pre><b>LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dstBuffer, size_t dstCapacity,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* prefsPtr);
+</b><p>  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ `prefsPtr` is optional : you may provide NULL as argument,
+  however, it's the only way to provide dictID in the frame header.
+ @return : number of bytes written into dstBuffer for the header,
+           or an error code (which can be tested using LZ4F_isError()) 
+</p></pre><BR>
+
+<pre><b>LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+    LZ4F_dctx* dctxPtr,
+    void* dstBuffer, size_t* dstSizePtr,
+    const void* srcBuffer, size_t* srcSizePtr,
+    const void* dict, size_t dictSize,
+    const LZ4F_decompressOptions_t* decompressOptionsPtr);
+</b><p>  Same as LZ4F_decompress(), using a predefined dictionary.
+  Dictionary is used "in place", without any preprocessing.
+  It must remain accessible throughout the entire frame decoding. 
+</p></pre><BR>
+
+</html>
+</body>
diff --git a/examples/.gitignore b/examples/.gitignore
index 3ceb90d..5abeef6 100644
--- a/examples/.gitignore
+++ b/examples/.gitignore
@@ -6,4 +6,5 @@
 /ringBufferHC
 /lineCompress
 /frameCompress
+/simpleBuffer
 /*.exe
diff --git a/examples/HCStreaming_ringBuffer.c b/examples/HCStreaming_ringBuffer.c
index d49b267..a878577 100644
--- a/examples/HCStreaming_ringBuffer.c
+++ b/examples/HCStreaming_ringBuffer.c
@@ -1,12 +1,12 @@
 // LZ4 HC streaming API example : ring buffer
-// Based on previous work from Takayuki Matsuoka
+// Based on a previous example by Takayuki Matsuoka
 
 
 /**************************************
  * Compiler Options
  **************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define _CRT_SECURE_NO_WARNINGS   /* for MSVC */
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)  /* Visual Studio <= 2013 */
+#  define _CRT_SECURE_NO_WARNINGS
 #  define snprintf sprintf_s
 #endif
 
diff --git a/examples/Makefile b/examples/Makefile
index aad713b..103e7ec 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -27,13 +27,14 @@
 # kindly provided by Takayuki Matsuoka
 # ##########################################################################
 
-CFLAGS ?= -O3
-CFLAGS += -std=gnu99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
-FLAGS  := -I../lib $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CPPFLAGS += -I../lib
+CFLAGS   ?= -O3
+CFLAGS   += -std=gnu99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
+FLAGS    := $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
 
-TESTFILE= Makefile
-LZ4DIR := ../lib
-LZ4 = ../programs/lz4
+TESTFILE  = Makefile
+LZ4DIR   := ../lib
+LZ4       = ../programs/lz4
 
 
 # Define *.exe as extension for Windows systems
@@ -48,42 +49,58 @@
 
 default: all
 
-all: printVersion doubleBuffer dictionaryRandomAccess ringBuffer ringBufferHC lineCompress frameCompress
+all: printVersion doubleBuffer dictionaryRandomAccess ringBuffer ringBufferHC \
+     lineCompress frameCompress simpleBuffer
 
-printVersion: $(LZ4DIR)/lz4.c printVersion.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+$(LZ4DIR)/liblz4.a: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.h $(LZ4DIR)/lz4hc.h $(LZ4DIR)/lz4frame.h $(LZ4DIR)/lz4frame_static.h
+	$(MAKE) -C $(LZ4DIR) liblz4.a
 
-doubleBuffer: $(LZ4DIR)/lz4.c blockStreaming_doubleBuffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+printVersion: printVersion.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-dictionaryRandomAccess: $(LZ4DIR)/lz4.c dictionaryRandomAccess.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+doubleBuffer: blockStreaming_doubleBuffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-ringBuffer  : $(LZ4DIR)/lz4.c blockStreaming_ringBuffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+dictionaryRandomAccess: dictionaryRandomAccess.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-ringBufferHC: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c HCStreaming_ringBuffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+ringBuffer  : blockStreaming_ringBuffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-lineCompress: $(LZ4DIR)/lz4.c blockStreaming_lineByLine.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+ringBufferHC: HCStreaming_ringBuffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-frameCompress: frameCompress.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT) $(LZ4DIR)/liblz4.a
+lineCompress: blockStreaming_lineByLine.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-compressFunctions: $(LZ4DIR)/lz4.c compress_functions.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT) -lrt
+frameCompress: frameCompress.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-simpleBuffer: $(LZ4DIR)/lz4.c simple_buffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+compressFunctions: compress_functions.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT) -lrt
 
-test : all
+simpleBuffer: simple_buffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
+$(LZ4) :
+	$(MAKE) -C ../programs lz4
+
+test : all $(LZ4)
+	@echo "\n=== Print Version ==="
 	./printVersion$(EXT)
+	@echo "\n=== Simple compression example ==="
+	./simpleBuffer$(EXT)
+	@echo "\n=== Double-buffer ==="
 	./doubleBuffer$(EXT) $(TESTFILE)
-	./dictionaryRandomAccess$(EXT) $(TESTFILE) $(TESTFILE) 1100 1400
+	@echo "\n=== Ring Buffer ==="
 	./ringBuffer$(EXT)   $(TESTFILE)
+	@echo "\n=== Ring Buffer + LZ4 HC ==="
 	./ringBufferHC$(EXT) $(TESTFILE)
+	@echo "\n=== Compress line by line ==="
 	./lineCompress$(EXT) $(TESTFILE)
+	@echo "\n=== Dictionary Random Access ==="
+	./dictionaryRandomAccess$(EXT) $(TESTFILE) $(TESTFILE) 1100 1400
+	@echo "\n=== Frame compression ==="
 	./frameCompress$(EXT) $(TESTFILE)
 	$(LZ4) -vt $(TESTFILE).lz4
 
diff --git a/examples/blockStreaming_doubleBuffer.c b/examples/blockStreaming_doubleBuffer.c
index d02f258..acb3455 100644
--- a/examples/blockStreaming_doubleBuffer.c
+++ b/examples/blockStreaming_doubleBuffer.c
@@ -2,7 +2,7 @@
 // Copyright : Takayuki Matsuoka
 
 
-#ifdef _MSC_VER    /* Visual Studio */
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)  /* Visual Studio <= 2013 */
 #  define _CRT_SECURE_NO_WARNINGS
 #  define snprintf sprintf_s
 #endif
diff --git a/examples/blockStreaming_lineByLine.c b/examples/blockStreaming_lineByLine.c
index f449aa3..677c426 100644
--- a/examples/blockStreaming_lineByLine.c
+++ b/examples/blockStreaming_lineByLine.c
@@ -1,8 +1,8 @@
 // LZ4 streaming API example : line-by-line logfile compression
-// Copyright : Takayuki Matsuoka
+// by Takayuki Matsuoka
 
 
-#ifdef _MSC_VER    /* Visual Studio */
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)  /* Visual Studio <= 2013 */
 #  define _CRT_SECURE_NO_WARNINGS
 #  define snprintf sprintf_s
 #endif
diff --git a/examples/blockStreaming_ringBuffer.c b/examples/blockStreaming_ringBuffer.c
index 697d342..0b6a3ce 100644
--- a/examples/blockStreaming_ringBuffer.c
+++ b/examples/blockStreaming_ringBuffer.c
@@ -1,17 +1,14 @@
-// LZ4 streaming API example : ring buffer
-// Based on sample code from Takayuki Matsuoka
+/* LZ4 streaming API example : ring buffer
+ * Based on sample code from Takayuki Matsuoka */
 
 
 /**************************************
  * Compiler Options
  **************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define _CRT_SECURE_NO_WARNINGS // for MSVC
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)  /* Visual Studio <= 2013 */
+#  define _CRT_SECURE_NO_WARNINGS
 #  define snprintf sprintf_s
 #endif
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-braces"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
-#endif
 
 
 /**************************************
@@ -27,7 +24,7 @@
 enum {
     MESSAGE_MAX_BYTES   = 1024,
     RING_BUFFER_BYTES   = 1024 * 8 + MESSAGE_MAX_BYTES,
-    DECODE_RING_BUFFER  = RING_BUFFER_BYTES + MESSAGE_MAX_BYTES   // Intentionally larger, to test unsynchronized ring buffers
+    DECODE_RING_BUFFER  = RING_BUFFER_BYTES + MESSAGE_MAX_BYTES   /* Intentionally larger, to test unsynchronized ring buffers */
 };
 
 
@@ -50,7 +47,7 @@
 
 void test_compress(FILE* outFp, FILE* inpFp)
 {
-    LZ4_stream_t lz4Stream_body = { 0 };
+    LZ4_stream_t lz4Stream_body = { { 0 } };
     LZ4_stream_t* lz4Stream = &lz4Stream_body;
 
     static char inpBuf[RING_BUFFER_BYTES];
@@ -85,24 +82,22 @@
 void test_decompress(FILE* outFp, FILE* inpFp)
 {
     static char decBuf[DECODE_RING_BUFFER];
-    int   decOffset    = 0;
-    LZ4_streamDecode_t lz4StreamDecode_body = { 0 };
+    int decOffset = 0;
+    LZ4_streamDecode_t lz4StreamDecode_body = { { 0 } };
     LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;
 
     for(;;) {
         int cmpBytes = 0;
         char cmpBuf[CMPBUFSIZE];
 
-        {
-            const size_t r0 = read_int32(inpFp, &cmpBytes);
+        {   const size_t r0 = read_int32(inpFp, &cmpBytes);
             if(r0 != 1 || cmpBytes <= 0) break;
 
             const size_t r1 = read_bin(inpFp, cmpBuf, cmpBytes);
             if(r1 != (size_t) cmpBytes) break;
         }
 
-        {
-            char* const decPtr = &decBuf[decOffset];
+        {   char* const decPtr = &decBuf[decOffset];
             const int decBytes = LZ4_decompress_safe_continue(
                 lz4StreamDecode, cmpBuf, decPtr, cmpBytes, MESSAGE_MAX_BYTES);
             if(decBytes <= 0) break;
@@ -120,7 +115,7 @@
 {
     int result = 0;
 
-    while(0 == result) {
+    while (0 == result) {
         char b0[65536];
         char b1[65536];
         const size_t r0 = fread(b0, 1, sizeof(b0), f0);
@@ -128,12 +123,9 @@
 
         result = (int) r0 - (int) r1;
 
-        if(0 == r0 || 0 == r1) {
-            break;
-        }
-        if(0 == result) {
-            result = memcmp(b0, b1, r0);
-        }
+        if (0 == r0 || 0 == r1) break;
+
+        if (0 == result) result = memcmp(b0, b1, r0);
     }
 
     return result;
@@ -146,7 +138,7 @@
     char lz4Filename[256] = { 0 };
     char decFilename[256] = { 0 };
 
-    if(argc < 2) {
+    if (argc < 2) {
         printf("Please specify input filename\n");
         return 0;
     }
@@ -160,9 +152,8 @@
     printf("dec = [%s]\n", decFilename);
 
     // compress
-    {
-        FILE* inpFp = fopen(inpFilename, "rb");
-        FILE* outFp = fopen(lz4Filename, "wb");
+    {   FILE* const inpFp = fopen(inpFilename, "rb");
+        FILE* const outFp = fopen(lz4Filename, "wb");
 
         test_compress(outFp, inpFp);
 
@@ -171,9 +162,8 @@
     }
 
     // decompress
-    {
-        FILE* inpFp = fopen(lz4Filename, "rb");
-        FILE* outFp = fopen(decFilename, "wb");
+    {   FILE* const inpFp = fopen(lz4Filename, "rb");
+        FILE* const outFp = fopen(decFilename, "wb");
 
         test_decompress(outFp, inpFp);
 
@@ -182,12 +172,11 @@
     }
 
     // verify
-    {
-        FILE* inpFp = fopen(inpFilename, "rb");
-        FILE* decFp = fopen(decFilename, "rb");
+    {   FILE* const inpFp = fopen(inpFilename, "rb");
+        FILE* const decFp = fopen(decFilename, "rb");
 
         const int cmp = compare(inpFp, decFp);
-        if(0 == cmp) {
+        if (0 == cmp) {
             printf("Verify : OK\n");
         } else {
             printf("Verify : NG\n");
diff --git a/examples/dictionaryRandomAccess.c b/examples/dictionaryRandomAccess.c
index 6acf99b..291fd08 100644
--- a/examples/dictionaryRandomAccess.c
+++ b/examples/dictionaryRandomAccess.c
@@ -1,6 +1,6 @@
 // LZ4 API example : Dictionary Random Access
 
-#ifdef _MSC_VER    /* Visual Studio */
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)  /* Visual Studio <= 2013 */
 #  define _CRT_SECURE_NO_WARNINGS
 #  define snprintf sprintf_s
 #endif
diff --git a/examples/frameCompress.c b/examples/frameCompress.c
index 75f1576..a0c5d3d 100644
--- a/examples/frameCompress.c
+++ b/examples/frameCompress.c
@@ -1,311 +1,400 @@
-// LZ4frame API example : compress a file
-// Based on sample code from Zbigniew Jędrzejewski-Szmek
+/* LZ4frame API example : compress a file
+ * Modified from an example code by Zbigniew Jędrzejewski-Szmek
+ *
+ * This example streams an input file into an output file
+ * using a bounded memory budget.
+ * Input is read in chunks of IN_CHUNK_SIZE */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <assert.h>
 
 #include <lz4frame.h>
 
-#define BUF_SIZE 16*1024
-#define LZ4_HEADER_SIZE 19
-#define LZ4_FOOTER_SIZE 4
 
-static const LZ4F_preferences_t lz4_preferences = {
-	{ LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0, { 0, 0 } },
-	0,   /* compression level */
-	0,   /* autoflush */
-	{ 0, 0, 0, 0 },  /* reserved, must be set to 0 */
+#define IN_CHUNK_SIZE  (16*1024)
+
+static const LZ4F_preferences_t kPrefs = {
+    { LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame,
+      0 /* unknown content size */, 0 /* no dictID */ , LZ4F_noBlockChecksum },
+    0,   /* compression level; 0 == default */
+    0,   /* autoflush */
+    0,   /* favor decompression speed */
+    { 0, 0, 0 },  /* reserved, must be set to 0 */
 };
 
-static size_t compress_file(FILE *in, FILE *out, size_t *size_in, size_t *size_out) {
-	LZ4F_errorCode_t r;
-	LZ4F_compressionContext_t ctx;
-	char *src, *buf = NULL;
-	size_t size, n, k, count_in = 0, count_out, offset = 0, frame_size;
 
-	r = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
-	if (LZ4F_isError(r)) {
-		printf("Failed to create context: error %zu\n", r);
-		return 1;
-	}
-	r = 1;
-
-	src = malloc(BUF_SIZE);
-	if (!src) {
-		printf("Not enough memory\n");
-		goto cleanup;
-	}
-
-	frame_size = LZ4F_compressBound(BUF_SIZE, &lz4_preferences);
-	size =  frame_size + LZ4_HEADER_SIZE + LZ4_FOOTER_SIZE;
-	buf = malloc(size);
-	if (!buf) {
-		printf("Not enough memory\n");
-		goto cleanup;
-	}
-
-	n = offset = count_out = LZ4F_compressBegin(ctx, buf, size, &lz4_preferences);
-	if (LZ4F_isError(n)) {
-		printf("Failed to start compression: error %zu\n", n);
-		goto cleanup;
-	}
-
-	printf("Buffer size is %zu bytes, header size %zu bytes\n", size, n);
-
-	for (;;) {
-		k = fread(src, 1, BUF_SIZE, in);
-		if (k == 0)
-			break;
-		count_in += k;
-
-		n = LZ4F_compressUpdate(ctx, buf + offset, size - offset, src, k, NULL);
-		if (LZ4F_isError(n)) {
-			printf("Compression failed: error %zu\n", n);
-			goto cleanup;
-		}
-
-		offset += n;
-		count_out += n;
-		if (size - offset < frame_size + LZ4_FOOTER_SIZE) {
-			printf("Writing %zu bytes\n", offset);
-
-			k = fwrite(buf, 1, offset, out);
-			if (k < offset) {
-				if (ferror(out))
-					printf("Write failed\n");
-				else
-					printf("Short write\n");
-				goto cleanup;
-			}
-
-			offset = 0;
-		}
-	}
-
-	n = LZ4F_compressEnd(ctx, buf + offset, size - offset, NULL);
-	if (LZ4F_isError(n)) {
-		printf("Failed to end compression: error %zu\n", n);
-		goto cleanup;
-	}
-
-	offset += n;
-	count_out += n;
-	printf("Writing %zu bytes\n", offset);
-
-	k = fwrite(buf, 1, offset, out);
-	if (k < offset) {
-		if (ferror(out))
-			printf("Write failed\n");
-		else
-			printf("Short write\n");
-		goto cleanup;
-	}
-
-	*size_in = count_in;
-	*size_out = count_out;
-	r = 0;
- cleanup:
-	if (ctx)
-		LZ4F_freeCompressionContext(ctx);
-	free(src);
-	free(buf);
-	return r;
+/* safe_fwrite() :
+ * performs fwrite(), ensure operation success, or immediately exit() */
+static void safe_fwrite(void* buf, size_t eltSize, size_t nbElt, FILE* f)
+{
+    size_t const writtenSize = fwrite(buf, eltSize, nbElt, f);
+    size_t const expectedSize = eltSize * nbElt;
+    assert(expectedSize / nbElt == eltSize);   /* check overflow */
+    if (writtenSize < expectedSize) {
+        if (ferror(f))  /* note : ferror() must follow fwrite */
+            fprintf(stderr, "Write failed \n");
+        else
+            fprintf(stderr, "Short write \n");
+        exit(1);
+    }
 }
 
+
+/* ================================================= */
+/*     Streaming Compression example               */
+/* ================================================= */
+
+typedef struct {
+    int error;
+    unsigned long long size_in;
+    unsigned long long size_out;
+} compressResult_t;
+
+static compressResult_t
+compress_file_internal(FILE* f_in, FILE* f_out,
+                       LZ4F_compressionContext_t ctx,
+                       void* inBuff,  size_t inChunkSize,
+                       void* outBuff, size_t outCapacity)
+{
+    compressResult_t result = { 1, 0, 0 };  /* result for an error */
+    unsigned long long count_in = 0, count_out;
+
+    assert(f_in != NULL); assert(f_out != NULL);
+    assert(ctx != NULL);
+    assert(outCapacity >= LZ4F_HEADER_SIZE_MAX);
+    assert(outCapacity >= LZ4F_compressBound(inChunkSize, &kPrefs));
+
+    /* write frame header */
+    {   size_t const headerSize = LZ4F_compressBegin(ctx, outBuff, outCapacity, &kPrefs);
+        if (LZ4F_isError(headerSize)) {
+            printf("Failed to start compression: error %zu\n", headerSize);
+            return result;
+        }
+        count_out = headerSize;
+        printf("Buffer size is %zu bytes, header size %zu bytes\n", outCapacity, headerSize);
+        safe_fwrite(outBuff, 1, headerSize, f_out);
+    }
+
+    /* stream file */
+    for (;;) {
+        size_t const readSize = fread(inBuff, 1, IN_CHUNK_SIZE, f_in);
+        if (readSize == 0) break; /* nothing left to read from input file */
+        count_in += readSize;
+
+        size_t const compressedSize = LZ4F_compressUpdate(ctx,
+                                                outBuff, outCapacity,
+                                                inBuff, readSize,
+                                                NULL);
+        if (LZ4F_isError(compressedSize)) {
+            printf("Compression failed: error %zu\n", compressedSize);
+            return result;
+        }
+
+        printf("Writing %zu bytes\n", compressedSize);
+        safe_fwrite(outBuff, 1, compressedSize, f_out);
+        count_out += compressedSize;
+    }
+
+    /* flush whatever remains within internal buffers */
+    {   size_t const compressedSize = LZ4F_compressEnd(ctx,
+                                                outBuff, outCapacity,
+                                                NULL);
+        if (LZ4F_isError(compressedSize)) {
+            printf("Failed to end compression: error %zu\n", compressedSize);
+            return result;
+        }
+
+        printf("Writing %zu bytes\n", compressedSize);
+        safe_fwrite(outBuff, 1, compressedSize, f_out);
+        count_out += compressedSize;
+    }
+
+    result.size_in = count_in;
+    result.size_out = count_out;
+    result.error = 0;
+    return result;
+}
+
+static compressResult_t
+compress_file(FILE* f_in, FILE* f_out)
+{
+    assert(f_in != NULL);
+    assert(f_out != NULL);
+
+    /* ressource allocation */
+    LZ4F_compressionContext_t ctx;
+    size_t const ctxCreation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
+    void* const src = malloc(IN_CHUNK_SIZE);
+    size_t const outbufCapacity = LZ4F_compressBound(IN_CHUNK_SIZE, &kPrefs);   /* large enough for any input <= IN_CHUNK_SIZE */
+    void* const outbuff = malloc(outbufCapacity);
+
+    compressResult_t result = { 1, 0, 0 };  /* == error (default) */
+    if (!LZ4F_isError(ctxCreation) && src && outbuff) {
+        result = compress_file_internal(f_in, f_out,
+                                        ctx,
+                                        src, IN_CHUNK_SIZE,
+                                        outbuff, outbufCapacity);
+    } else {
+        printf("error : ressource allocation failed \n");
+    }
+
+    LZ4F_freeCompressionContext(ctx);   /* supports free on NULL */
+    free(src);
+    free(outbuff);
+    return result;
+}
+
+
+/* ================================================= */
+/*     Streaming decompression example               */
+/* ================================================= */
+
 static size_t get_block_size(const LZ4F_frameInfo_t* info) {
-	switch (info->blockSizeID) {
+    switch (info->blockSizeID) {
         case LZ4F_default:
-		case LZ4F_max64KB:  return 1 << 16;
-		case LZ4F_max256KB: return 1 << 18;
-		case LZ4F_max1MB:   return 1 << 20;
-		case LZ4F_max4MB:   return 1 << 22;
-		default:
-			printf("Impossible unless more block sizes are allowed\n");
-			exit(1);
-	}
+        case LZ4F_max64KB:  return 1 << 16;
+        case LZ4F_max256KB: return 1 << 18;
+        case LZ4F_max1MB:   return 1 << 20;
+        case LZ4F_max4MB:   return 1 << 22;
+        default:
+            printf("Impossible with expected frame specification (<=v1.6.1)\n");
+            exit(1);
+    }
 }
 
-static size_t decompress_file(FILE *in, FILE *out) {
-	void* const src = malloc(BUF_SIZE);
-	void* dst = NULL;
-	size_t dstCapacity = 0;
-	LZ4F_dctx *dctx = NULL;
-	size_t ret;
-
-	/* Initialization */
-    if (!src) { perror("decompress_file(src)"); goto cleanup; }
-	ret = LZ4F_createDecompressionContext(&dctx, 100);
-	if (LZ4F_isError(ret)) {
-		printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(ret));
-		goto cleanup;
-	}
-
-	/* Decompression */
-	ret = 1;
-	while (ret != 0) {
-		/* Load more input */
-		size_t srcSize = fread(src, 1, BUF_SIZE, in);
-		void* srcPtr = src;
-		void* srcEnd = srcPtr + srcSize;
-		if (srcSize == 0 || ferror(in)) {
-			printf("Decompress: not enough input or error reading file\n");
-			goto cleanup;
-		}
-		/* Allocate destination buffer if it isn't already */
-		if (!dst) {
-			LZ4F_frameInfo_t info;
-			ret = LZ4F_getFrameInfo(dctx, &info, src, &srcSize);
-			if (LZ4F_isError(ret)) {
-				printf("LZ4F_getFrameInfo error: %s\n", LZ4F_getErrorName(ret));
-				goto cleanup;
-			}
-			/* Allocating enough space for an entire block isn't necessary for
-			 * correctness, but it allows some memcpy's to be elided.
-			 */
-			dstCapacity = get_block_size(&info);
-			dst = malloc(dstCapacity);
-            if (!dst) { perror("decompress_file(dst)"); goto cleanup; }
-			srcPtr += srcSize;
-			srcSize = srcEnd - srcPtr;
-		}
-		/* Decompress:
-		 * Continue while there is more input to read and the frame isn't over.
-		 * If srcPtr == srcEnd then we know that there is no more output left in the
-		 * internal buffer left to flush.
-		 */
-		while (srcPtr != srcEnd && ret != 0) {
-			/* INVARIANT: Any data left in dst has already been written */
-			size_t dstSize = dstCapacity;
-			ret = LZ4F_decompress(dctx, dst, &dstSize, srcPtr, &srcSize, /* LZ4F_decompressOptions_t */ NULL);
-			if (LZ4F_isError(ret)) {
-				printf("Decompression error: %s\n", LZ4F_getErrorName(ret));
-				goto cleanup;
-			}
-			/* Flush output */
-			if (dstSize != 0){
-				size_t written = fwrite(dst, 1, dstSize, out);
-				printf("Writing %zu bytes\n", dstSize);
-				if (written != dstSize) {
-					printf("Decompress: Failed to write to file\n");
-					goto cleanup;
-				}
-			}
-			/* Update input */
-			srcPtr += srcSize;
-			srcSize = srcEnd - srcPtr;
-		}
-	}
-	/* Check that there isn't trailing input data after the frame.
-	 * It is valid to have multiple frames in the same file, but this example
-	 * doesn't support it.
-	 */
-	ret = fread(src, 1, 1, in);
-	if (ret != 0 || !feof(in)) {
-		printf("Decompress: Trailing data left in file after frame\n");
-		goto cleanup;
-	}
-
-cleanup:
-	free(src);
-	free(dst);
-	return LZ4F_freeDecompressionContext(dctx);   /* note : free works on NULL */
-}
-
-int compare(FILE* fp0, FILE* fp1)
+/* @return : 1==error, 0==success */
+static int
+decompress_file_internal(FILE* f_in, FILE* f_out,
+                         LZ4F_dctx* dctx,
+                         void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed,
+                         void* dst, size_t dstCapacity)
 {
-	int result = 0;
+    int firstChunk = 1;
+    size_t ret = 1;
 
-	while(0 == result) {
-		char b0[1024];
-		char b1[1024];
-		const size_t r0 = fread(b0, 1, sizeof(b0), fp0);
-		const size_t r1 = fread(b1, 1, sizeof(b1), fp1);
+    assert(f_in != NULL); assert(f_out != NULL);
+    assert(dctx != NULL);
+    assert(src != NULL); assert(srcCapacity > 0); assert(filled <= srcCapacity); assert(alreadyConsumed <= filled);
+    assert(dst != NULL); assert(dstCapacity > 0);
 
-		result = (int) r0 - (int) r1;
+    /* Decompression */
+    while (ret != 0) {
+        /* Load more input */
+        size_t readSize = firstChunk ? filled : fread(src, 1, srcCapacity, f_in); firstChunk=0;
+        const void* srcPtr = src + alreadyConsumed; alreadyConsumed=0;
+        const void* const srcEnd = srcPtr + readSize;
+        if (readSize == 0 || ferror(f_in)) {
+            printf("Decompress: not enough input or error reading file\n");
+            return 1;
+        }
 
-		if (0 == r0 || 0 == r1) {
-			break;
-		}
-		if (0 == result) {
-			result = memcmp(b0, b1, r0);
-		}
-	}
+        /* Decompress:
+         * Continue while there is more input to read (srcPtr != srcEnd)
+         * and the frame isn't over (ret != 0)
+         */
+        while (srcPtr < srcEnd && ret != 0) {
+            /* Any data within dst has been flushed at this stage */
+            size_t dstSize = dstCapacity;
+            size_t srcSize = srcEnd - srcPtr;
+            ret = LZ4F_decompress(dctx, dst, &dstSize, srcPtr, &srcSize, /* LZ4F_decompressOptions_t */ NULL);
+            if (LZ4F_isError(ret)) {
+                printf("Decompression error: %s\n", LZ4F_getErrorName(ret));
+                return 1;
+            }
+            /* Flush output */
+            if (dstSize != 0) safe_fwrite(dst, 1, dstSize, f_out);
+            /* Update input */
+            srcPtr += srcSize;
+        }
 
-	return result;
+        assert(srcPtr <= srcEnd);
+
+        /* Ensure all input data has been consumed.
+         * It is valid to have multiple frames in the same file,
+         * but this example only supports one frame.
+         */
+        if (srcPtr < srcEnd) {
+            printf("Decompress: Trailing data left in file after frame\n");
+            return 1;
+        }
+    }
+
+    /* Check that there isn't trailing data in the file after the frame.
+     * It is valid to have multiple frames in the same file,
+     * but this example only supports one frame.
+     */
+    {   size_t const readSize = fread(src, 1, 1, f_in);
+        if (readSize != 0 || !feof(f_in)) {
+            printf("Decompress: Trailing data left in file after frame\n");
+            return 1;
+    }   }
+
+    return 0;
 }
 
+
+/* @return : 1==error, 0==completed */
+static int
+decompress_file_allocDst(FILE* f_in, FILE* f_out,
+                        LZ4F_dctx* dctx,
+                        void* src, size_t srcCapacity)
+{
+    assert(f_in != NULL); assert(f_out != NULL);
+    assert(dctx != NULL);
+    assert(src != NULL);
+    assert(srcCapacity >= LZ4F_HEADER_SIZE_MAX);  /* ensure LZ4F_getFrameInfo() can read enough data */
+
+    /* Read Frame header */
+    size_t const readSize = fread(src, 1, srcCapacity, f_in);
+    if (readSize == 0 || ferror(f_in)) {
+        printf("Decompress: not enough input or error reading file\n");
+        return 1;
+    }
+
+    LZ4F_frameInfo_t info;
+    size_t consumedSize = readSize;
+    {   size_t const fires = LZ4F_getFrameInfo(dctx, &info, src, &consumedSize);
+        if (LZ4F_isError(fires)) {
+            printf("LZ4F_getFrameInfo error: %s\n", LZ4F_getErrorName(fires));
+            return 1;
+    }   }
+
+    /* Allocating enough space for an entire block isn't necessary for
+     * correctness, but it allows some memcpy's to be elided.
+     */
+    size_t const dstCapacity = get_block_size(&info);
+    void* const dst = malloc(dstCapacity);
+    if (!dst) { perror("decompress_file(dst)"); return 1; }
+
+    int const decompressionResult = decompress_file_internal(
+                        f_in, f_out,
+                        dctx,
+                        src, srcCapacity, readSize-consumedSize, consumedSize,
+                        dst, dstCapacity);
+
+    free(dst);
+    return decompressionResult;
+}
+
+
+/* @result : 1==error, 0==success */
+static int decompress_file(FILE* f_in, FILE* f_out)
+{
+    assert(f_in != NULL); assert(f_out != NULL);
+
+    /* Ressource allocation */
+    void* const src = malloc(IN_CHUNK_SIZE);
+    if (!src) { perror("decompress_file(src)"); return 1; }
+
+    LZ4F_dctx* dctx;
+    {   size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
+        if (LZ4F_isError(dctxStatus)) {
+            printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus));
+    }   }
+
+    int const result = !dctx ? 1 /* error */ :
+                       decompress_file_allocDst(f_in, f_out, dctx, src, IN_CHUNK_SIZE);
+
+    free(src);
+    LZ4F_freeDecompressionContext(dctx);   /* note : free works on NULL */
+    return result;
+}
+
+
+int compareFiles(FILE* fp0, FILE* fp1)
+{
+    int result = 0;
+
+    while (result==0) {
+        char b0[1024];
+        char b1[1024];
+        size_t const r0 = fread(b0, 1, sizeof(b0), fp0);
+        size_t const r1 = fread(b1, 1, sizeof(b1), fp1);
+
+        result = (r0 != r1);
+        if (!r0 || !r1) break;
+        if (!result) result = memcmp(b0, b1, r0);
+    }
+
+    return result;
+}
+
+
 int main(int argc, const char **argv) {
-	char inpFilename[256] = { 0 };
-	char lz4Filename[256] = { 0 };
-	char decFilename[256] = { 0 };
+    char inpFilename[256] = { 0 };
+    char lz4Filename[256] = { 0 };
+    char decFilename[256] = { 0 };
 
-	if(argc < 2) {
-		printf("Please specify input filename\n");
-		return 0;
-	}
+    if (argc < 2) {
+        printf("Please specify input filename\n");
+        return 0;
+    }
 
-	snprintf(inpFilename, 256, "%s", argv[1]);
-	snprintf(lz4Filename, 256, "%s.lz4", argv[1]);
-	snprintf(decFilename, 256, "%s.lz4.dec", argv[1]);
+    snprintf(inpFilename, 256, "%s", argv[1]);
+    snprintf(lz4Filename, 256, "%s.lz4", argv[1]);
+    snprintf(decFilename, 256, "%s.lz4.dec", argv[1]);
 
-	printf("inp = [%s]\n", inpFilename);
-	printf("lz4 = [%s]\n", lz4Filename);
-	printf("dec = [%s]\n", decFilename);
+    printf("inp = [%s]\n", inpFilename);
+    printf("lz4 = [%s]\n", lz4Filename);
+    printf("dec = [%s]\n", decFilename);
 
-	/* compress */
-	{   FILE* const inpFp = fopen(inpFilename, "rb");
-		FILE* const outFp = fopen(lz4Filename, "wb");
-		size_t sizeIn = 0;
-		size_t sizeOut = 0;
-		size_t ret;
+    /* compress */
+    {   FILE* const inpFp = fopen(inpFilename, "rb");
+        FILE* const outFp = fopen(lz4Filename, "wb");
 
-		printf("compress : %s -> %s\n", inpFilename, lz4Filename);
-		ret = compress_file(inpFp, outFp, &sizeIn, &sizeOut);
-		if (ret) {
-			printf("compress : failed with code %zu\n", ret);
-			return ret;
-		}
-		printf("%s: %zu → %zu bytes, %.1f%%\n",
-			inpFilename, sizeIn, sizeOut,
-			(double)sizeOut / sizeIn * 100);
-		printf("compress : done\n");
+        printf("compress : %s -> %s\n", inpFilename, lz4Filename);
+        compressResult_t const ret = compress_file(inpFp, outFp);
 
-		fclose(outFp);
-		fclose(inpFp);
-	}
+        fclose(outFp);
+        fclose(inpFp);
 
-	/* decompress */
-	{   FILE* const inpFp = fopen(lz4Filename, "rb");
-		FILE* const outFp = fopen(decFilename, "wb");
-		size_t ret;
+        if (ret.error) {
+            printf("compress : failed with code %i\n", ret.error);
+            return ret.error;
+        }
+        printf("%s: %zu → %zu bytes, %.1f%%\n",
+            inpFilename,
+            (size_t)ret.size_in, (size_t)ret.size_out,  /* might overflow is size_t is 32 bits and size_{in,out} > 4 GB */
+            (double)ret.size_out / ret.size_in * 100);
+        printf("compress : done\n");
+    }
 
-		printf("decompress : %s -> %s\n", lz4Filename, decFilename);
-		ret = decompress_file(inpFp, outFp);
-		if (ret) {
-			printf("decompress : failed with code %zu\n", ret);
-			return ret;
-		}
-		printf("decompress : done\n");
+    /* decompress */
+    {   FILE* const inpFp = fopen(lz4Filename, "rb");
+        FILE* const outFp = fopen(decFilename, "wb");
 
-		fclose(outFp);
-		fclose(inpFp);
-	}
+        printf("decompress : %s -> %s\n", lz4Filename, decFilename);
+        int const ret = decompress_file(inpFp, outFp);
 
-	/* verify */
-	{   FILE* const inpFp = fopen(inpFilename, "rb");
-		FILE* const decFp = fopen(decFilename, "rb");
+        fclose(outFp);
+        fclose(inpFp);
 
-		printf("verify : %s <-> %s\n", inpFilename, decFilename);
-		const int cmp = compare(inpFp, decFp);
-		if(0 == cmp) {
-			printf("verify : OK\n");
-		} else {
-			printf("verify : NG\n");
-		}
+        if (ret) {
+            printf("decompress : failed with code %i\n", ret);
+            return ret;
+        }
+        printf("decompress : done\n");
+    }
 
-		fclose(decFp);
-		fclose(inpFp);
-	}
+    /* verify */
+    {   FILE* const inpFp = fopen(inpFilename, "rb");
+        FILE* const decFp = fopen(decFilename, "rb");
+
+        printf("verify : %s <-> %s\n", inpFilename, decFilename);
+        int const cmp = compareFiles(inpFp, decFp);
+
+        fclose(decFp);
+        fclose(inpFp);
+
+        if (cmp) {
+            printf("corruption detected : decompressed file differs from original\n");
+            return cmp;
+        }
+        printf("verify : OK\n");
+    }
+
+    return 0;
 }
diff --git a/examples/printVersion.c b/examples/printVersion.c
index 8607139..7af318a 100644
--- a/examples/printVersion.c
+++ b/examples/printVersion.c
@@ -1,5 +1,5 @@
 // LZ4 trivial example : print Library version number
-// Copyright : Takayuki Matsuoka & Yann Collet
+// by Takayuki Matsuoka
 
 
 #include <stdio.h>
diff --git a/examples/simple_buffer.c b/examples/simple_buffer.c
index f531d48..403d9e8 100644
--- a/examples/simple_buffer.c
+++ b/examples/simple_buffer.c
@@ -8,10 +8,10 @@
  */
 
 /* Includes, for Power! */
-#include "lz4.h"    // This is all that is required to expose the prototypes for basic compression and decompression.
-#include <stdio.h>  // For printf()
-#include <string.h> // For memcmp()
-#include <stdlib.h> // For exit()
+#include "lz4.h"     // This is all that is required to expose the prototypes for basic compression and decompression.
+#include <stdio.h>   // For printf()
+#include <string.h>  // For memcmp()
+#include <stdlib.h>  // For exit()
 
 /*
  * Easy show-error-and-bail function.
@@ -28,37 +28,39 @@
  */
 int main(void) {
   /* Introduction */
-  // Below we will have a Compression and Decompression section to demonstrate.  There are a few important notes before we start:
-  //   1) The return codes of LZ4_ functions are important.  Read lz4.h if you're unsure what a given code means.
-  //   2) LZ4 uses char* pointers in all LZ4_ functions.  This is baked into the API and probably not going to change.  If your
-  //      program uses pointers that are unsigned char*, void*, or otherwise different you may need to do some casting or set the
-  //      right -W compiler flags to ignore those warnings (e.g.: -Wno-pointer-sign).
+  // Below we will have a Compression and Decompression section to demonstrate.
+  // There are a few important notes before we start:
+  //   1) The return codes of LZ4_ functions are important.
+  //      Read lz4.h if you're unsure what a given code means.
+  //   2) LZ4 uses char* pointers in all LZ4_ functions.
+  //      This is baked into the API and probably not going to change.
+  //      If your program uses pointers that are unsigned char*, void*, or otherwise different,
+  //      you may need to do some casting or set the right -W compiler flags to ignore those warnings (e.g.: -Wno-pointer-sign).
 
   /* Compression */
   // We'll store some text into a variable pointed to by *src to be compressed later.
-  const char *src = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
-  // The compression function needs to know how many bytes of exist.  Since we're using a string, we can use strlen() + 1 (for \0).
-  const size_t src_size = strlen(src) + 1;
+  const char* const src = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
+  // The compression function needs to know how many bytes exist.  Since we're using a string, we can use strlen() + 1 (for \0).
+  const int src_size = (int)(strlen(src) + 1);
   // LZ4 provides a function that will tell you the maximum size of compressed output based on input data via LZ4_compressBound().
-  const size_t max_dst_size = LZ4_compressBound(src_size);
+  const int max_dst_size = LZ4_compressBound(src_size);
   // We will use that size for our destination boundary when allocating space.
-  char *compressed_data = malloc(max_dst_size);
+  char* compressed_data = malloc(max_dst_size);
   if (compressed_data == NULL)
     run_screaming("Failed to allocate memory for *compressed_data.", 1);
-  // That's all the information and preparation LZ4 needs to compress *src into *compressed_data.  Invoke LZ4_compress_default now
-  // with our size values and pointers to our memory locations.  Save the return value for error checking.
-  int return_value = 0;
-  return_value = LZ4_compress_default(src, compressed_data, src_size, max_dst_size);
+  // That's all the information and preparation LZ4 needs to compress *src into *compressed_data.
+  // Invoke LZ4_compress_default now with our size values and pointers to our memory locations.
+  // Save the return value for error checking.
+  const int compressed_data_size = LZ4_compress_default(src, compressed_data, src_size, max_dst_size);
   // Check return_value to determine what happened.
-  if (return_value < 0)
-    run_screaming("A negative result from LZ4_compress_default indicates a failure trying to compress the data.  See exit code (echo $?) for value returned.", return_value);
-  if (return_value == 0)
+  if (compressed_data_size < 0)
+    run_screaming("A negative result from LZ4_compress_default indicates a failure trying to compress the data.  See exit code (echo $?) for value returned.", compressed_data_size);
+  if (compressed_data_size == 0)
     run_screaming("A result of 0 means compression worked, but was stopped because the destination buffer couldn't hold all the information.", 1);
-  if (return_value > 0)
+  if (compressed_data_size > 0)
     printf("We successfully compressed some data!\n");
-  // Not only does a positive return_value mean success, the value returned == the number of bytes required.  You can use this to
-  // realloc() *compress_data to free up memory, if desired.  We'll do so just to demonstrate the concept.
-  const size_t compressed_data_size = return_value;
+  // Not only does a positive return_value mean success, the value returned == the number of bytes required.
+  // You can use this to realloc() *compress_data to free up memory, if desired.  We'll do so just to demonstrate the concept.
   compressed_data = (char *)realloc(compressed_data, compressed_data_size);
   if (compressed_data == NULL)
     run_screaming("Failed to re-alloc memory for compressed_data.  Sad :(", 1);
@@ -66,25 +68,27 @@
   /* Decompression */
   // Now that we've successfully compressed the information from *src to *compressed_data, let's do the opposite!  We'll create a
   // *new_src location of size src_size since we know that value.
-  char *new_src = malloc(src_size);
-  if (new_src == NULL)
-    run_screaming("Failed to allocate memory for *new_src.", 1);
-  // The LZ4_decompress_safe function needs to know where the compressed data is, how many bytes long it is, where the new_src
-  // memory location is, and how large the new_src (uncompressed) output will be.  Again, save the return_value.
-  return_value = LZ4_decompress_safe(compressed_data, new_src, compressed_data_size, src_size);
-  if (return_value < 0)
-    run_screaming("A negative result from LZ4_decompress_fast indicates a failure trying to decompress the data.  See exit code (echo $?) for value returned.", return_value);
-  if (return_value == 0)
+  char* const regen_buffer = malloc(src_size);
+  if (regen_buffer == NULL)
+    run_screaming("Failed to allocate memory for *regen_buffer.", 1);
+  // The LZ4_decompress_safe function needs to know where the compressed data is, how many bytes long it is,
+  // where the regen_buffer memory location is, and how large regen_buffer (uncompressed) output will be.
+  // Again, save the return_value.
+  const int decompressed_size = LZ4_decompress_safe(compressed_data, regen_buffer, compressed_data_size, src_size);
+  free(compressed_data);   /* no longer useful */
+  if (decompressed_size < 0)
+    run_screaming("A negative result from LZ4_decompress_safe indicates a failure trying to decompress the data.  See exit code (echo $?) for value returned.", decompressed_size);
+  if (decompressed_size == 0)
     run_screaming("I'm not sure this function can ever return 0.  Documentation in lz4.h doesn't indicate so.", 1);
-  if (return_value > 0)
+  if (decompressed_size > 0)
     printf("We successfully decompressed some data!\n");
-  // Not only does a positive return value mean success, the value returned == the number of bytes read from the compressed_data
-  // stream.  I'm not sure there's ever a time you'll need to know this in most cases...
+  // Not only does a positive return value mean success,
+  // value returned == number of bytes regenerated from compressed_data stream.
 
   /* Validation */
   // We should be able to compare our original *src with our *new_src and be byte-for-byte identical.
-  if (memcmp(src, new_src, src_size) != 0)
+  if (memcmp(src, regen_buffer, src_size) != 0)
     run_screaming("Validation failed.  *src and *new_src are not identical.", 1);
-  printf("Validation done.  The string we ended up with is:\n%s\n", new_src);
+  printf("Validation done.  The string we ended up with is:\n%s\n", regen_buffer);
   return 0;
 }
diff --git a/lib/Makefile b/lib/Makefile
index 2d9c8f3..88d9b4f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -3,6 +3,8 @@
 # Copyright (C) Yann Collet 2011-2016
 # All rights reserved.
 #
+# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
+#
 # BSD license
 # Redistribution and use in source and binary forms, with or without modification,
 # are permitted provided that the following conditions are met:
@@ -40,25 +42,28 @@
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER  := $(shell echo $(LIBVER_SCRIPT))
 
-BUILD_STATIC:= yes
+BUILD_SHARED:=yes
+BUILD_STATIC:=yes
 
+OS ?= $(shell uname)
 CPPFLAGS+= -DXXH_NAMESPACE=LZ4_
 CFLAGS  ?= -O3
-DEBUGFLAGS:=-g -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-           -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \
-           -Wpointer-arith -Wstrict-aliasing=1
-CFLAGS  += $(MOREFLAGS)
+DEBUGFLAGS:= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+             -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
+             -Wundef -Wpointer-arith -Wstrict-aliasing=1
+CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
 FLAGS    = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
 
+SRCFILES := $(sort $(wildcard *.c))
 
 
 # OS X linker doesn't support -soname, and use different extension
 # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
-ifeq ($(shell uname), Darwin)
+ifeq ($(OS), Darwin)
 	SHARED_EXT = dylib
 	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
 	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
-	SONAME_FLAGS = -install_name $(PREFIX)/lib/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+	SONAME_FLAGS = -install_name $(libdir)/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
 else
 	SONAME_FLAGS = -Wl,-soname=liblz4.$(SHARED_EXT).$(LIBVER_MAJOR)
 	SHARED_EXT = so
@@ -68,103 +73,129 @@
 
 LIBLZ4 = liblz4.$(SHARED_EXT_VER)
 
+.PHONY: default
 default: lib-release
 
-lib-release: liblz4.a liblz4
+lib-release: DEBUGFLAGS :=
+lib-release: lib
 
-lib: CFLAGS += $(DEBUGFLAGS)
-lib: lib-release
+lib: liblz4.a liblz4
 
 all: lib
 
 all32: CFLAGS+=-m32
 all32: all
 
-liblz4.a: *.c
-ifeq ($(BUILD_STATIC),yes)
-	@echo compiling static library
-	@$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
-	@$(AR) rcs $@ *.o
+ifeq ($(V), 1)
+Q =
+else
+Q = @
 endif
 
-$(LIBLZ4): *.c
+liblz4.a: $(SRCFILES)
+ifeq ($(BUILD_STATIC),yes)  # can be disabled on command line
+	@echo compiling static library
+	$(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
+	$(Q)$(AR) rcs $@ *.o
+endif
+
+$(LIBLZ4): $(SRCFILES)
+ifeq ($(BUILD_SHARED),yes)  # can be disabled on command line
 	@echo compiling dynamic library $(LIBVER)
 ifneq (,$(filter Windows%,$(OS)))
-	@$(CC) $(FLAGS) -DLZ4_DLL_EXPORT=1 -shared $^ -o dll\$@.dll
+	$(Q)$(CC) $(FLAGS) -DLZ4_DLL_EXPORT=1 -shared $^ -o dll\$@.dll
 	dlltool -D dll\liblz4.dll -d dll\liblz4.def -l dll\liblz4.lib
 else
-	@$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@
+	$(Q)$(CC) $(FLAGS) -shared $^ -fPIC -fvisibility=hidden $(SONAME_FLAGS) -o $@
 	@echo creating versioned links
-	@ln -sf $@ liblz4.$(SHARED_EXT_MAJOR)
-	@ln -sf $@ liblz4.$(SHARED_EXT)
+	$(Q)ln -sf $@ liblz4.$(SHARED_EXT_MAJOR)
+	$(Q)ln -sf $@ liblz4.$(SHARED_EXT)
+endif
 endif
 
 liblz4: $(LIBLZ4)
 
 clean:
-	@$(RM) core *.o liblz4.pc dll/liblz4.dll dll/liblz4.lib
-	@$(RM) *.a *.$(SHARED_EXT) *.$(SHARED_EXT_MAJOR) *.$(SHARED_EXT_VER)
+	$(Q)$(RM) core *.o liblz4.pc dll/liblz4.dll dll/liblz4.lib
+	$(Q)$(RM) *.a *.$(SHARED_EXT) *.$(SHARED_EXT_MAJOR) *.$(SHARED_EXT_VER)
 	@echo Cleaning library completed
 
 
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 
-ifneq (,$(filter $(shell uname),SunOS))
+.PHONY: listL120
+listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
+	find . -type f -name '*.c' -o -name '*.h' | while read -r filename; do awk 'length > 120 {print FILENAME "(" FNR "): " $$0}' $$filename; done
+
+DESTDIR     ?=
+# directory variables : GNU conventions prefer lowercase
+# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
+# support both lower and uppercase (BSD), use lower in script
+PREFIX      ?= /usr/local
+prefix      ?= $(PREFIX)
+EXEC_PREFIX ?= $(prefix)
+exec_prefix ?= $(EXEC_PREFIX)
+LIBDIR      ?= $(exec_prefix)/lib
+libdir      ?= $(LIBDIR)
+INCLUDEDIR  ?= $(prefix)/include
+includedir  ?= $(INCLUDEDIR)
+
+ifneq (,$(filter $(OS),OpenBSD FreeBSD NetBSD DragonFly))
+PKGCONFIGDIR ?= $(prefix)/libdata/pkgconfig
+else
+PKGCONFIGDIR ?= $(libdir)/pkgconfig
+endif
+pkgconfigdir ?= $(PKGCONFIGDIR)
+
+ifneq (,$(filter $(OS),SunOS))
 INSTALL ?= ginstall
 else
 INSTALL ?= install
 endif
 
-PREFIX     ?= /usr/local
-DESTDIR    ?=
-LIBDIR     ?= $(PREFIX)/lib
-INCLUDEDIR ?= $(PREFIX)/include
-
-ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly))
-PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
-else
-PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
-endif
-
-INSTALL_LIB  ?= $(INSTALL) -m 755
-INSTALL_DATA ?= $(INSTALL) -m 644
+INSTALL_PROGRAM ?= $(INSTALL)
+INSTALL_DATA    ?= $(INSTALL) -m 644
 
 liblz4.pc: liblz4.pc.in Makefile
 	@echo creating pkgconfig
-	@sed -e 's|@PREFIX@|$(PREFIX)|' \
-         -e 's|@LIBDIR@|$(LIBDIR)|' \
-         -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+	$(Q)sed -e 's|@PREFIX@|$(prefix)|' \
+         -e 's|@LIBDIR@|$(libdir)|' \
+         -e 's|@INCLUDEDIR@|$(includedir)|' \
          -e 's|@VERSION@|$(LIBVER)|' \
           $< >$@
 
 install: lib liblz4.pc
-	@$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ $(DESTDIR)$(INCLUDEDIR)/
-	@$(INSTALL_DATA) liblz4.pc $(DESTDIR)$(PKGCONFIGDIR)/
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(pkgconfigdir)/ $(DESTDIR)$(includedir)/ $(DESTDIR)$(libdir)/
+	$(Q)$(INSTALL_DATA) liblz4.pc $(DESTDIR)$(pkgconfigdir)/
 	@echo Installing libraries
 ifeq ($(BUILD_STATIC),yes)
-	@$(INSTALL_LIB) liblz4.a $(DESTDIR)$(LIBDIR)/liblz4.a
+	$(Q)$(INSTALL_DATA) liblz4.a $(DESTDIR)$(libdir)/liblz4.a
+	$(Q)$(INSTALL_DATA) lz4frame_static.h $(DESTDIR)$(includedir)/lz4frame_static.h
 endif
-	@$(INSTALL_LIB) liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)
-	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
-	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
-	@echo Installing includes
-	@$(INSTALL_DATA) lz4.h $(DESTDIR)$(INCLUDEDIR)/lz4.h
-	@$(INSTALL_DATA) lz4hc.h $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
-	@$(INSTALL_DATA) lz4frame.h $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
-	@echo lz4 static and shared libraries installed
+ifeq ($(BUILD_SHARED),yes)
+	$(Q)$(INSTALL_PROGRAM) liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)
+	$(Q)ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_MAJOR)
+	$(Q)ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT)
+endif
+	@echo Installing headers in $(includedir)
+	$(Q)$(INSTALL_DATA) lz4.h $(DESTDIR)$(includedir)/lz4.h
+	$(Q)$(INSTALL_DATA) lz4hc.h $(DESTDIR)$(includedir)/lz4hc.h
+	$(Q)$(INSTALL_DATA) lz4frame.h $(DESTDIR)$(includedir)/lz4frame.h
+	@echo lz4 libraries installed
 
 uninstall:
-	@$(RM) $(DESTDIR)$(LIBDIR)/pkgconfig/liblz4.pc
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER)
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.a
-	@$(RM) $(DESTDIR)$(INCLUDEDIR)/lz4.h
-	@$(RM) $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
-	@$(RM) $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	$(Q)$(RM) $(DESTDIR)$(pkgconfigdir)/liblz4.pc
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT)
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_MAJOR)
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_VER)
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.a
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4.h
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4hc.h
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4frame.h
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4frame_static.h
 	@echo lz4 libraries successfully uninstalled
 
 endif
diff --git a/lib/README.md b/lib/README.md
index b40442c..7082fe3 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -1,44 +1,43 @@
 LZ4 - Library Files
 ================================
 
-The directory contains many files, but depending on project's objectives,
+The `/lib` directory contains many files, but depending on project's objectives,
 not all of them are necessary.
 
 #### Minimal LZ4 build
 
 The minimum required is **`lz4.c`** and **`lz4.h`**,
-which will provide the fast compression and decompression algorithm.
+which provides the fast compression and decompression algorithm.
+They generate and decode data using [LZ4 block format].
 
 
-#### The High Compression variant of LZ4
+#### High Compression variant
 
-For more compression at the cost of compression speed,
-the High Compression variant **lz4hc** is available.
-It's necessary to add **`lz4hc.c`** and **`lz4hc.h`**.
-The variant still depends on regular `lz4` source files.
-In particular, the decompression is still provided by `lz4.c`.
+For more compression ratio at the cost of compression speed,
+the High Compression variant called **lz4hc** is available.
+Add files **`lz4hc.c`** and **`lz4hc.h`**.
+The variant still depends on regular `lib/lz4.*` source files.
 
 
-#### Compatibility issues
+#### Frame variant, for interoperability
 
-In order to produce files or streams compatible with `lz4` command line utility,
+In order to produce compressed data compatible with `lz4` command line utility,
 it's necessary to encode lz4-compressed blocks using the [official interoperable frame format].
 This format is generated and decoded automatically by the **lz4frame** library.
-In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**,
-which provides error detection.
-(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace.
-This is what `liblz4` does, to avoid symbol duplication
-in case a user program would link to several libraries containing xxhash symbols.)
+Its public API is described in `lib/lz4frame.h`.
+In order to work properly, lz4frame needs all other modules present in `/lib`,
+including, lz4 and lz4hc, and also **xxhash**.
+So it's necessary to include all `*.c` and `*.h` files present in `/lib`.
 
 
-#### Advanced API 
+#### Advanced / Experimental API
 
-A more complex `lz4frame_static.h` is also provided.
-It contains definitions which are not guaranteed to remain stable within future versions.
-It must be used with static linking ***only***.
+A complex API defined in `lz4frame_static.h` contains definitions
+which are not guaranteed to remain stable in future versions.
+As a consequence, it must be used with static linking ***only***.
 
 
-#### Using MinGW+MSYS to create DLL
+#### Windows : using MinGW+MSYS to create DLL
 
 DLL can be created using MinGW+MSYS with the `make liblz4` command.
 This command creates `dll\liblz4.dll` and the import library `dll\liblz4.lib`.
@@ -51,23 +50,24 @@
 ```
     gcc $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\liblz4.dll
 ```
-The compiled executable will require LZ4 DLL which is available at `dll\liblz4.dll`. 
+The compiled executable will require LZ4 DLL which is available at `dll\liblz4.dll`.
 
 
-#### Miscellaneous 
+#### Miscellaneous
 
 Other files present in the directory are not source code. There are :
 
- - LICENSE : contains the BSD license text
- - Makefile : script to compile or install lz4 library (static or dynamic)
- - liblz4.pc.in : for pkg-config (make install)
- - README.md : this file
+ - `LICENSE` : contains the BSD license text
+ - `Makefile` : `make` script to compile and install lz4 library (static and dynamic)
+ - `liblz4.pc.in` : for `pkg-config` (used in `make install`)
+ - `README.md` : this file
 
 [official interoperable frame format]: ../doc/lz4_Frame_format.md
+[LZ4 block format]: ../doc/lz4_Block_format.md
 
 
-#### License 
+#### License
 
 All source material within __lib__ directory are BSD 2-Clause licensed.
 See [LICENSE](LICENSE) for details.
-The license is also repeated at the top of each source file.
+The license is also reminded at the top of each source file.
diff --git a/lib/lz4.c b/lib/lz4.c
index 143c36e..4046102 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1,6 +1,6 @@
 /*
    LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2016, Yann Collet.
+   Copyright (C) 2011-present, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -37,12 +37,12 @@
 *  Tuning parameters
 **************************************/
 /*
- * HEAPMODE :
+ * LZ4_HEAPMODE :
  * Select how default compression functions will allocate memory for their hash table,
  * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
  */
-#ifndef HEAPMODE
-#  define HEAPMODE 0
+#ifndef LZ4_HEAPMODE
+#  define LZ4_HEAPMODE 0
 #endif
 
 /*
@@ -63,16 +63,17 @@
  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets which generate assembly depending on alignment.
+ *            It can generate buggy code on targets which assembly generation depends on alignment.
  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  * Prefer these methods in priority order (0 > 1 > 2)
  */
-#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
+#  if defined(__GNUC__) && \
+  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define LZ4_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || \
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
 #    define LZ4_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -81,14 +82,17 @@
  * LZ4_FORCE_SW_BITCOUNT
  * Define this parameter if your target system or compiler does not support hardware bit count
  */
-#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
 #  define LZ4_FORCE_SW_BITCOUNT
 #endif
 
 
+
 /*-************************************
 *  Dependency
 **************************************/
+#define LZ4_STATIC_LINKING_ONLY
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 #include "lz4.h"
 /* see also "memory routines" below */
 
@@ -97,38 +101,72 @@
 *  Compiler Options
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
-#  define FORCE_INLINE static __forceinline
 #  include <intrin.h>
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
-#else
-#  if defined(__GNUC__) || defined(__clang__)
-#    define FORCE_INLINE static inline __attribute__((always_inline))
-#  elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#    define FORCE_INLINE static inline
-#  else
-#    define FORCE_INLINE static
-#  endif
 #endif  /* _MSC_VER */
 
+#ifndef LZ4_FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define LZ4_FORCE_INLINE static __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define LZ4_FORCE_INLINE static inline
+#      endif
+#    else
+#      define LZ4_FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+ * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
+#  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
+#  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
+#else
+#  define LZ4_FORCE_O2_GCC_PPC64LE
+#  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
+#endif
+
 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 #else
 #  define expect(expr,value)    (expr)
 #endif
 
+#ifndef likely
 #define likely(expr)     expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
 #define unlikely(expr)   expect((expr) != 0, 0)
+#endif
 
 
 /*-************************************
 *  Memory routines
 **************************************/
 #include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOCATOR(n,s) calloc(n,s)
-#define FREEMEM        free
+#define ALLOC(s)          malloc(s)
+#define ALLOC_AND_ZERO(s) calloc(1,s)
+#define FREEMEM(p)        free(p)
 #include <string.h>   /* memset, memcpy */
-#define MEM_INIT       memset
+#define MEM_INIT(p,v,s)   memset((p),(v),(s))
 
 
 /*-************************************
@@ -241,19 +279,15 @@
     }
 }
 
-static void LZ4_copy8(void* dst, const void* src)
-{
-    memcpy(dst,src,8);
-}
-
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
-static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
     BYTE* d = (BYTE*)dstPtr;
     const BYTE* s = (const BYTE*)srcPtr;
     BYTE* const e = (BYTE*)dstEnd;
 
-    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+    do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 }
 
 
@@ -263,8 +297,9 @@
 #define MINMATCH 4
 
 #define WILDCOPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
 static const int LZ4_minLength = (MFLIMIT+1);
 
 #define KB *(1 <<10)
@@ -281,15 +316,36 @@
 
 
 /*-************************************
-*  Common Utils
+*  Error detection
 **************************************/
-#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                                  \
+                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+                    fprintf(stderr, __FILE__ ": ");           \
+                    fprintf(stderr, __VA_ARGS__);             \
+                    fprintf(stderr, " \n");                   \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
 
 
 /*-************************************
 *  Common functions
 **************************************/
-static unsigned LZ4_NbCommonBytes (register reg_t val)
+static unsigned LZ4_NbCommonBytes (reg_t val)
 {
     if (LZ4_isLittleEndian()) {
         if (sizeof(val)==8) {
@@ -300,7 +356,14 @@
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
             return (__builtin_ctzll((U64)val) >> 3);
 #       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
 #       endif
         } else /* 32 bits */ {
@@ -311,12 +374,15 @@
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
             return (__builtin_ctz((U32)val) >> 3);
 #       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
 #       endif
         }
     } else   /* Big Endian CPU */ {
-        if (sizeof(val)==8) {
+        if (sizeof(val)==8) {   /* 64-bits */
 #       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
             unsigned long r = 0;
             _BitScanReverse64( &r, val );
@@ -324,8 +390,11 @@
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
             return (__builtin_clzll((U64)val) >> 3);
 #       else
+            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
+                Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+                Note that this code path is never triggered in 32-bits mode. */
             unsigned r;
-            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
             r += (!val);
             return r;
@@ -348,11 +417,20 @@
 }
 
 #define STEPSIZE sizeof(reg_t)
-static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
 {
     const BYTE* const pStart = pIn;
 
-    while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) {
+            pIn+=STEPSIZE; pMatch+=STEPSIZE;
+        } else {
+            return LZ4_NbCommonBytes(diff);
+    }   }
+
+    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
         pIn += LZ4_NbCommonBytes(diff);
@@ -377,15 +455,35 @@
 /*-************************************
 *  Local Structures and types
 **************************************/
-typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
+typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive;
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 
-typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict        : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ *                   blob being compressed are valid and refer to the preceding
+ *                   content (of length ctx->dictSize), which is available
+ *                   contiguously preceding in memory the content currently
+ *                   being compressed.
+ * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
+ *                   else in memory, starting at ctx->dictionary with length
+ *                   ctx->dictSize.
+ * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
+ *                   content is in a separate context, pointed to by
+ *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ *                   entries in the current context that refer to positions
+ *                   preceding the beginning of the current compression are
+ *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ *                   ->dictSize describe the location and size of the preceding
+ *                   content, and matches are found by looking in the ctx
+ *                   ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
 
 /*-************************************
 *  Local Utils
@@ -396,6 +494,21 @@
 int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
 
 
+/*-************************************
+*  Internal Definitions used in Tests
+**************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
 /*-******************************
 *  Compression functions
 ********************************/
@@ -418,93 +531,190 @@
         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 }
 
-FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
 {
     if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
     return LZ4_hash4(LZ4_read32(p), tableType);
 }
 
-static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 {
     switch (tableType)
     {
+    default: /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: { /* illegal! */ assert(0); return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+    }
+}
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+                                  void* tableBase, tableType_t const tableType,
+                            const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case clearedTable: { /* illegal! */ assert(0); return; }
     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
     }
 }
 
-FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 {
     U32 const h = LZ4_hashPosition(p, tableType);
     LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }
 
-static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 {
-    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+    if (tableType == byU32) {
+        const U32* const hashTable = (const U32*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+        return hashTable[h];
+    }
+    if (tableType == byU16) {
+        const U16* const hashTable = (const U16*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+        return hashTable[h];
+    }
+    assert(0); return 0;  /* forbidden case */
 }
 
-FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
+                                             const void* tableBase, tableType_t tableType,
+                                             const BYTE* srcBase)
 {
     U32 const h = LZ4_hashPosition(p, tableType);
     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 }
 
+LZ4_FORCE_INLINE void LZ4_prepareTable(
+        LZ4_stream_t_internal* const cctx,
+        const int inputSize,
+        const tableType_t tableType) {
+    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+     * therefore safe to use no matter what mode we're in. Otherwise, we figure
+     * out if it's safe to leave as is or whether it needs to be reset.
+     */
+    if (cctx->tableType != clearedTable) {
+        if (cctx->tableType != tableType
+          || (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
+          || (tableType == byU32 && cctx->currentOffset > 1 GB)
+          || tableType == byPtr
+          || inputSize >= 4 KB)
+        {
+            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+            cctx->currentOffset = 0;
+            cctx->tableType = clearedTable;
+        } else {
+            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+        }
+    }
+
+    /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
+     * than compressing without a gap. However, compressing with
+     * currentOffset == 0 is faster still, so we preserve that case.
+     */
+    if (cctx->currentOffset != 0 && tableType == byU32) {
+        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+        cctx->currentOffset += 64 KB;
+    }
+
+    /* Finally, clear history */
+    cctx->dictCtx = NULL;
+    cctx->dictionary = NULL;
+    cctx->dictSize = 0;
+}
 
 /** LZ4_compress_generic() :
     inlined, to ensure branches are decided at compilation time */
-FORCE_INLINE int LZ4_compress_generic(
+LZ4_FORCE_INLINE int LZ4_compress_generic(
                  LZ4_stream_t_internal* const cctx,
                  const char* const source,
                  char* const dest,
                  const int inputSize,
+                 int *inputConsumed, /* only written when outputLimited == fillOutput */
                  const int maxOutputSize,
                  const limitedOutput_directive outputLimited,
                  const tableType_t tableType,
-                 const dict_directive dict,
+                 const dict_directive dictDirective,
                  const dictIssue_directive dictIssue,
                  const U32 acceleration)
 {
     const BYTE* ip = (const BYTE*) source;
-    const BYTE* base;
+
+    U32 const startIndex = cctx->currentOffset;
+    const BYTE* base = (const BYTE*) source - startIndex;
     const BYTE* lowLimit;
-    const BYTE* const lowRefLimit = ip - cctx->dictSize;
-    const BYTE* const dictionary = cctx->dictionary;
-    const BYTE* const dictEnd = dictionary + cctx->dictSize;
-    const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source;
+
+    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+    const BYTE* const dictionary =
+        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+    const U32 dictSize =
+        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
+
+    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
+    const BYTE* const dictEnd = dictionary + dictSize;
     const BYTE* anchor = (const BYTE*) source;
     const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
     const BYTE* const matchlimit = iend - LASTLITERALS;
 
+    /* the dictCtx currentOffset is indexed on the start of the dictionary,
+     * while a dictionary in the current context precedes the currentOffset */
+    const BYTE* dictBase = (dictDirective == usingDictCtx) ?
+                            dictionary + dictSize - dictCtx->currentOffset :
+                            dictionary + dictSize - startIndex;
+
     BYTE* op = (BYTE*) dest;
     BYTE* const olimit = op + maxOutputSize;
 
+    U32 offset = 0;
     U32 forwardH;
 
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
     /* Init conditions */
+    if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
     if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
-    switch(dict)
-    {
-    case noDict:
-    default:
-        base = (const BYTE*)source;
-        lowLimit = (const BYTE*)source;
-        break;
-    case withPrefix64k:
-        base = (const BYTE*)source - cctx->currentOffset;
-        lowLimit = (const BYTE*)source - cctx->dictSize;
-        break;
-    case usingExtDict:
-        base = (const BYTE*)source - cctx->currentOffset;
-        lowLimit = (const BYTE*)source;
-        break;
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
+    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
+    assert(acceleration >= 1);
+
+    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+    /* Update context state */
+    if (dictDirective == usingDictCtx) {
+        /* Subsequent linked blocks can't use the dictionary. */
+        /* Instead, they use the block we just compressed. */
+        cctx->dictCtx = NULL;
+        cctx->dictSize = (U32)inputSize;
+    } else {
+        cctx->dictSize += (U32)inputSize;
     }
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+    cctx->currentOffset += (U32)inputSize;
+    cctx->tableType = (U16)tableType;
+
+    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 
     /* First Byte */
     LZ4_putPosition(ip, cctx->hashTable, tableType, base);
@@ -512,12 +722,12 @@
 
     /* Main Loop */
     for ( ; ; ) {
-        ptrdiff_t refDelta = 0;
         const BYTE* match;
         BYTE* token;
 
         /* Find a match */
-        {   const BYTE* forwardIp = ip;
+        if (tableType == byPtr) {
+            const BYTE* forwardIp = ip;
             unsigned step = 1;
             unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
             do {
@@ -526,34 +736,89 @@
                 forwardIp += step;
                 step = (searchMatchNb++ >> LZ4_skipTrigger);
 
-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
 
                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
-                if (dict==usingExtDict) {
-                    if (match < (const BYTE*)source) {
-                        refDelta = dictDelta;
-                        lowLimit = dictionary;
-                    } else {
-                        refDelta = 0;
-                        lowLimit = (const BYTE*)source;
-                }   }
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
 
-            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
-                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+            } while ( (match+MAX_DISTANCE < ip)
+                   || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+        } else {   /* byU32, byU16 */
+
+            const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                U32 const current = (U32)(forwardIp - base);
+                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+                assert(matchIndex <= current);
+                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                if (dictDirective == usingDictCtx) {
+                    if (matchIndex < startIndex) {
+                        /* there was no match, try the dictionary */
+                        assert(tableType == byU32);
+                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                        match = dictBase + matchIndex;
+                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else if (dictDirective==usingExtDict) {
+                    if (matchIndex < startIndex) {
+                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
+                        assert(startIndex - matchIndex >= MINMATCH);
+                        match = dictBase + matchIndex;
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else {   /* single continuous memory segment */
+                    match = base + matchIndex;
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
+                assert(matchIndex < current);
+                if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue;  /* too far */
+                if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE);     /* too_far presumed impossible with byU16 */
+
+                if (LZ4_read32(match) == LZ4_read32(ip)) {
+                    if (maybe_extMem) offset = current - matchIndex;
+                    break;   /* match found */
+                }
+
+            } while(1);
         }
 
         /* Catch up */
-        while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
 
         /* Encode Literals */
         {   unsigned const litLength = (unsigned)(ip - anchor);
             token = op++;
-            if ((outputLimited) &&  /* Check output buffer overflow */
+            if ((outputLimited == limitedOutput) &&  /* Check output buffer overflow */
                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
                 return 0;
+            if ((outputLimited == fillOutput) &&
+                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+                op--;
+                goto _last_literals;
+            }
             if (litLength >= RUN_MASK) {
                 int len = (int)litLength-RUN_MASK;
                 *token = (RUN_MASK<<ML_BITS);
@@ -565,40 +830,79 @@
             /* Copy Literals */
             LZ4_wildCopy(op, anchor, op+litLength);
             op+=litLength;
+            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
         }
 
 _next_match:
+        /* at this stage, the following variables must be correctly set :
+         * - ip : at start of LZ operation
+         * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+         * - offset : if maybe_ext_memSegment==1 (constant)
+         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+         */
+
+        if ((outputLimited == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+            /* the match was too close to the end, rewind and go to last literals */
+            op = token;
+            goto _last_literals;
+        }
+
         /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+        if (maybe_extMem) {   /* static test */
+            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+            assert(offset <= MAX_DISTANCE && offset > 0);
+            LZ4_writeLE16(op, (U16)offset); op+=2;
+        } else  {
+            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
+            assert(ip-match <= MAX_DISTANCE);
+            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+        }
 
         /* Encode MatchLength */
         {   unsigned matchCode;
 
-            if ((dict==usingExtDict) && (lowLimit==dictionary)) {
-                const BYTE* limit;
-                match += refDelta;
-                limit = ip + (dictEnd-match);
+            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+              && (lowLimit==dictionary) /* match within extDict */ ) {
+                const BYTE* limit = ip + (dictEnd-match);
+                assert(dictEnd > match);
                 if (limit > matchlimit) limit = matchlimit;
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
                 ip += MINMATCH + matchCode;
                 if (ip==limit) {
-                    unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
                     matchCode += more;
                     ip += more;
                 }
+                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
             } else {
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
                 ip += MINMATCH + matchCode;
+                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
             }
 
-            if ( outputLimited &&    /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) )
-                return 0;
+            if ((outputLimited) &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
+                if (outputLimited == limitedOutput)
+                  return 0;
+                if (outputLimited == fillOutput) {
+                    /* Match description too long : reduce it */
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
+                    ip -= matchCode - newMatchCode;
+                    matchCode = newMatchCode;
+                }
+            }
             if (matchCode >= ML_MASK) {
                 *token += ML_MASK;
                 matchCode -= ML_MASK;
                 LZ4_write32(op, 0xFFFFFFFF);
-                while (matchCode >= 4*255) op+=4, LZ4_write32(op, 0xFFFFFFFF), matchCode -= 4*255;
+                while (matchCode >= 4*255) {
+                    op+=4;
+                    LZ4_write32(op, 0xFFFFFFFF);
+                    matchCode -= 4*255;
+                }
                 op += matchCode / 255;
                 *op++ = (BYTE)(matchCode % 255);
             } else
@@ -608,37 +912,80 @@
         anchor = ip;
 
         /* Test end of chunk */
-        if (ip > mflimit) break;
+        if (ip >= mflimitPlusOne) break;
 
         /* Fill table */
         LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
 
         /* Test next position */
-        match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
-        if (dict==usingExtDict) {
-            if (match < (const BYTE*)source) {
-                refDelta = dictDelta;
-                lowLimit = dictionary;
-            } else {
-                refDelta = 0;
-                lowLimit = (const BYTE*)source;
-        }   }
-        LZ4_putPosition(ip, cctx->hashTable, tableType, base);
-        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
-            && (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
+        if (tableType == byPtr) {
+
+            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+            if ( (match+MAX_DISTANCE >= ip)
+              && (LZ4_read32(match) == LZ4_read32(ip)) )
+            { token=op++; *token=0; goto _next_match; }
+
+        } else {   /* byU32, byU16 */
+
+            U32 const h = LZ4_hashPosition(ip, tableType);
+            U32 const current = (U32)(ip-base);
+            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if (dictDirective == usingDictCtx) {
+                if (matchIndex < startIndex) {
+                    /* there was no match, try the dictionary */
+                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                    matchIndex += dictDelta;
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;  /* required for match length counter */
+                }
+            } else if (dictDirective==usingExtDict) {
+                if (matchIndex < startIndex) {
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;   /* required for match length counter */
+                }
+            } else {   /* single memory segment */
+                match = base + matchIndex;
+            }
+            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+              && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current))
+              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+                token=op++;
+                *token=0;
+                if (maybe_extMem) offset = current - matchIndex;
+                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+                goto _next_match;
+            }
+        }
 
         /* Prepare next loop */
         forwardH = LZ4_hashPosition(++ip, tableType);
+
     }
 
 _last_literals:
     /* Encode Last Literals */
-    {   size_t const lastRun = (size_t)(iend - anchor);
+    {   size_t lastRun = (size_t)(iend - anchor);
         if ( (outputLimited) &&  /* Check output buffer overflow */
-            ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) )
-            return 0;
+            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+            if (outputLimited == fillOutput) {
+                /* adapt lastRun to fill 'dst' */
+                lastRun  = (olimit-op) - 1;
+                lastRun -= (lastRun+240)/255;
+            }
+            if (outputLimited == limitedOutput)
+                return 0;
+        }
         if (lastRun >= RUN_MASK) {
             size_t accumulator = lastRun - RUN_MASK;
             *op++ = RUN_MASK << ML_BITS;
@@ -648,46 +995,99 @@
             *op++ = (BYTE)(lastRun<<ML_BITS);
         }
         memcpy(op, anchor, lastRun);
+        ip = anchor + lastRun;
         op += lastRun;
     }
 
-    /* End */
-    return (int) (((char*)op)-dest);
+    if (outputLimited == fillOutput) {
+        *inputConsumed = (int) (((const char*)ip)-source);
+    }
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
+    return (int)(((char*)op) - dest);
 }
 
 
 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
     LZ4_resetStream((LZ4_stream_t*)state);
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (inputSize < LZ4_64Klimit) {;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
 
-    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited,                        byU16, noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    if (dstCapacity >= LZ4_compressBound(srcSize)) {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
     } else {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput,                        byU16, noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
     }
 }
 
 
 int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
-#if (HEAPMODE)
-    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    int result;
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctxPtr == NULL) return 0;
 #else
     LZ4_stream_t ctx;
-    void* const ctxPtr = &ctx;
+    LZ4_stream_t* const ctxPtr = &ctx;
 #endif
+    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
 
-    int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
-
-#if (HEAPMODE)
+#if (LZ4_HEAPMODE)
     FREEMEM(ctxPtr);
 #endif
     return result;
@@ -708,172 +1108,15 @@
     LZ4_resetStream(&ctx);
 
     if (inputSize < LZ4_64Klimit)
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
     else
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
 }
 
 
-/*-******************************
-*  *_destSize() variant
-********************************/
-
-static int LZ4_compress_destSize_generic(
-                       LZ4_stream_t_internal* const ctx,
-                 const char* const src,
-                       char* const dst,
-                       int*  const srcSizePtr,
-                 const int targetDstSize,
-                 const tableType_t tableType)
-{
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* base = (const BYTE*) src;
-    const BYTE* lowLimit = (const BYTE*) src;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + *srcSizePtr;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dst;
-    BYTE* const oend = op + targetDstSize;
-    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
-    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
-    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
-
-    U32 forwardH;
-
-
-    /* Init conditions */
-    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
-    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
-    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
-
-    /* First Byte */
-    *srcSizePtr = 0;
-    LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
-    for ( ; ; ) {
-        const BYTE* match;
-        BYTE* token;
-
-        /* Find a match */
-        {   const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
-
-            do {
-                U32 h = forwardH;
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
-
-                match = LZ4_getPositionOnHash(h, ctx->hashTable, tableType, base);
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, ctx->hashTable, tableType, base);
-
-            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match) != LZ4_read32(ip)) );
-        }
-
-        /* Catch up */
-        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
-
-        /* Encode Literal length */
-        {   unsigned litLength = (unsigned)(ip - anchor);
-            token = op++;
-            if (op + ((litLength+240)/255) + litLength > oMaxLit) {
-                /* Not enough space for a last match */
-                op--;
-                goto _last_literals;
-            }
-            if (litLength>=RUN_MASK) {
-                unsigned len = litLength - RUN_MASK;
-                *token=(RUN_MASK<<ML_BITS);
-                for(; len >= 255 ; len-=255) *op++ = 255;
-                *op++ = (BYTE)len;
-            }
-            else *token = (BYTE)(litLength<<ML_BITS);
-
-            /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
-            op += litLength;
-        }
-
-_next_match:
-        /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
-
-        /* Encode MatchLength */
-        {   size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-
-            if (op + ((matchLength+240)/255) > oMaxMatch) {
-                /* Match description too long : reduce it */
-                matchLength = (15-1) + (oMaxMatch-op) * 255;
-            }
-            ip += MINMATCH + matchLength;
-
-            if (matchLength>=ML_MASK) {
-                *token += ML_MASK;
-                matchLength -= ML_MASK;
-                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
-                *op++ = (BYTE)matchLength;
-            }
-            else *token += (BYTE)(matchLength);
-        }
-
-        anchor = ip;
-
-        /* Test end of block */
-        if (ip > mflimit) break;
-        if (op > oMaxSeq) break;
-
-        /* Fill table */
-        LZ4_putPosition(ip-2, ctx->hashTable, tableType, base);
-
-        /* Test next position */
-        match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
-        LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-        if ( (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
-
-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
-    }
-
-_last_literals:
-    /* Encode Last Literals */
-    {   size_t lastRunSize = (size_t)(iend - anchor);
-        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) {
-            /* adapt lastRunSize to fill 'dst' */
-            lastRunSize  = (oend-op) - 1;
-            lastRunSize -= (lastRunSize+240)/255;
-        }
-        ip = anchor + lastRunSize;
-
-        if (lastRunSize >= RUN_MASK) {
-            size_t accumulator = lastRunSize - RUN_MASK;
-            *op++ = RUN_MASK << ML_BITS;
-            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        } else {
-            *op++ = (BYTE)(lastRunSize<<ML_BITS);
-        }
-        memcpy(op, anchor, lastRunSize);
-        op += lastRunSize;
-    }
-
-    /* End */
-    *srcSizePtr = (int) (((const char*)ip)-src);
-    return (int) (((char*)op)-dst);
-}
-
-
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
     LZ4_resetStream(state);
@@ -881,18 +1124,20 @@
     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
     } else {
-        if (*srcSizePtr < LZ4_64Klimit)
-            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, byU16);
-        else
-            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, sizeof(void*)==8 ? byU32 : byPtr);
-    }
+        if (*srcSizePtr < LZ4_64Klimit) {
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+        } else {
+            tableType_t const tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, tableType, noDict, noDictIssue, 1);
+    }   }
 }
 
 
 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
-#if (HEAPMODE)
-    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctx == NULL) return 0;
 #else
     LZ4_stream_t ctxBody;
     LZ4_stream_t* ctx = &ctxBody;
@@ -900,7 +1145,7 @@
 
     int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
 
-#if (HEAPMODE)
+#if (LZ4_HEAPMODE)
     FREEMEM(ctx);
 #endif
     return result;
@@ -914,19 +1159,28 @@
 
 LZ4_stream_t* LZ4_createStream(void)
 {
-    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
     LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+    if (lz4s == NULL) return NULL;
     LZ4_resetStream(lz4s);
     return lz4s;
 }
 
 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
 {
+    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
 }
 
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
 {
+    if (!LZ4_stream) return 0;   /* support free on NULL */
+    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
     FREEMEM(LZ4_stream);
     return (0);
 }
@@ -936,43 +1190,70 @@
 int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
 {
     LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const tableType_t tableType = byU32;
     const BYTE* p = (const BYTE*)dictionary;
     const BYTE* const dictEnd = p + dictSize;
     const BYTE* base;
 
-    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
-        LZ4_resetStream(LZ4_dict);
+    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+    /* It's necessary to reset the context,
+     * and not just continue it with prepareTable()
+     * to avoid any risk of generating overflowing matchIndex
+     * when compressing using this dictionary */
+    LZ4_resetStream(LZ4_dict);
+
+    /* We always increment the offset by 64 KB, since, if the dict is longer,
+     * we truncate it to the last 64k, and if it's shorter, we still want to
+     * advance by a whole window length so we can provide the guarantee that
+     * there are only valid offsets in the window, which allows an optimization
+     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+     * dictionary isn't a full 64k. */
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    base = dictEnd - 64 KB - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->currentOffset += 64 KB;
+    dict->tableType = tableType;
 
     if (dictSize < (int)HASH_UNIT) {
-        dict->dictionary = NULL;
-        dict->dictSize = 0;
         return 0;
     }
 
-    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
-    dict->currentOffset += 64 KB;
-    base = p - dict->currentOffset;
-    dict->dictionary = p;
-    dict->dictSize = (U32)(dictEnd - p);
-    dict->currentOffset += dict->dictSize;
-
     while (p <= dictEnd-HASH_UNIT) {
-        LZ4_putPosition(p, dict->hashTable, byU32, base);
+        LZ4_putPosition(p, dict->hashTable, tableType, base);
         p+=3;
     }
 
     return dict->dictSize;
 }
 
+void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
+    if (dictionary_stream != NULL) {
+        /* If the current offset is zero, we will never look in the
+         * external dictionary context, since there is no value a table
+         * entry can take that indicate a miss. In that case, we need
+         * to bump the offset to something non-zero.
+         */
+        if (working_stream->internal_donotuse.currentOffset == 0) {
+            working_stream->internal_donotuse.currentOffset = 64 KB;
+        }
+        working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
+    } else {
+        working_stream->internal_donotuse.dictCtx = NULL;
+    }
+}
 
-static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
 {
-    if ((LZ4_dict->currentOffset > 0x80000000) ||
-        ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {   /* address space overflow */
+    if (LZ4_dict->currentOffset + nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
         /* rescale hash table */
         U32 const delta = LZ4_dict->currentOffset - 64 KB;
         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
         int i;
+        DEBUGLOG(4, "LZ4_renormDictT");
         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
             else LZ4_dict->hashTable[i] -= delta;
@@ -986,15 +1267,25 @@
 
 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
+    const tableType_t tableType = byU32;
     LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+    const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
 
-    const BYTE* smallest = (const BYTE*) source;
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+
     if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
-    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
-    LZ4_renormDictT(streamPtr, smallest);
+    LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
 
+    /* invalidate tiny dictionaries */
+    if ( (streamPtr->dictSize-1 < 4)   /* intentional underflow */
+      && (dictEnd != (const BYTE*)source) ) {
+        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+        streamPtr->dictSize = 0;
+        streamPtr->dictionary = (const BYTE*)source;
+        dictEnd = (const BYTE*)source;
+    }
+
     /* Check overlapping input/dictionary space */
     {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
         if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
@@ -1007,46 +1298,61 @@
 
     /* prefix mode : source data follows dictionary */
     if (dictEnd == (const BYTE*)source) {
-        int result;
         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
         else
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
-        streamPtr->dictSize += (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
-        return result;
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
     }
 
     /* external dictionary mode */
     {   int result;
-        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
-        else
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        if (streamPtr->dictCtx) {
+            /* We depend here on the fact that dictCtx'es (produced by
+             * LZ4_loadDict) guarantee that their tables contain no references
+             * to offsets between dictCtx->currentOffset - 64 KB and
+             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+             * to use noDictIssue even when the dict isn't a full 64 KB.
+             */
+            if (inputSize > 4 KB) {
+                /* For compressing large blobs, it is faster to pay the setup
+                 * cost to copy the dictionary's tables into the active context,
+                 * so that the compression loop is only looking into one table.
+                 */
+                memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+            }
+        } else {
+            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            }
+        }
         streamPtr->dictionary = (const BYTE*)source;
         streamPtr->dictSize = (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
         return result;
     }
 }
 
 
-/* Hidden debug function, to force external dictionary mode */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
 {
     LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
     int result;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
 
-    const BYTE* smallest = dictEnd;
-    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
-    LZ4_renormDictT(streamPtr, smallest);
+    LZ4_renormDictT(streamPtr, srcSize);
 
-    result = LZ4_compress_generic(streamPtr, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+    } else {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    }
 
     streamPtr->dictionary = (const BYTE*)source;
-    streamPtr->dictSize = (U32)inputSize;
-    streamPtr->currentOffset += (U32)inputSize;
+    streamPtr->dictSize = (U32)srcSize;
 
     return result;
 }
@@ -1077,63 +1383,119 @@
 
 
 
-/*-*****************************
-*  Decompression functions
-*******************************/
+/*-*******************************
+ *  Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
+
 /*! LZ4_decompress_generic() :
- *  This generic decompression function cover all use cases.
- *  It shall be instantiated several times, using different sets of directives
- *  Note that it is important this generic function is really inlined,
+ *  This generic decompression function covers all use cases.
+ *  It shall be instantiated several times, using different sets of directives.
+ *  Note that it is important for performance that this function really get inlined,
  *  in order to remove useless branches during compilation optimization.
  */
-FORCE_INLINE int LZ4_decompress_generic(
-                 const char* const source,
-                 char* const dest,
-                 int inputSize,
-                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+                 const char* const src,
+                 char* const dst,
+                 int srcSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
 
-                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
-                 int partialDecoding,    /* full, partial */
-                 int targetOutputSize,   /* only used if partialDecoding==partial */
-                 int dict,               /* noDict, withPrefix64k, usingExtDict */
-                 const BYTE* const lowPrefix,  /* == dest when no prefix */
+                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
+                 earlyEnd_directive partialDecoding,  /* full, partial */
+                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
                  const size_t dictSize         /* note : = 0 if noDict */
                  )
 {
-    /* Local Variables */
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* const iend = ip + inputSize;
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
 
-    BYTE* op = (BYTE*) dest;
+    BYTE* op = (BYTE*) dst;
     BYTE* const oend = op + outputSize;
     BYTE* cpy;
-    BYTE* oexit = op + targetOutputSize;
-    const BYTE* const lowLimit = lowPrefix - dictSize;
 
     const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
-    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+    const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
+    const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
 
     const int safeDecode = (endOnInput==endOnInputSize);
     const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
 
+    /* Set up the "end" pointers for the shortcut. */
+    const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+    const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
 
     /* Special cases */
-    if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                        /* targetOutputSize too high => decode everything */
-    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
-    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+    assert(lowPrefix <= op);
+    assert(src != NULL);
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
+    if ((endOnInput) && unlikely(srcSize==0)) return -1;
 
     /* Main Loop : decode sequences */
     while (1) {
-        size_t length;
         const BYTE* match;
         size_t offset;
 
-        /* get literal length */
         unsigned const token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK) {
+        size_t length = token >> ML_BITS;  /* literal length */
+
+        assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+        /* A two-stage shortcut for the most common case:
+         * 1) If the literal length is 0..14, and there is enough space,
+         * enter the shortcut and copy 16 bytes on behalf of the literals
+         * (in the fast mode, only 8 bytes can be safely copied this way).
+         * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+         * manner; but we ensure that there's enough space in the output for
+         * those 18 bytes earlier, upon entering the shortcut (in other words,
+         * there is a combined check for both stages).
+         */
+        if ( (endOnInput ? length != RUN_MASK : length <= 8)
+            /* strictly "less than" on input, to re-enter the loop with at least one byte */
+          && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+            /* Copy the literals */
+            memcpy(op, ip, endOnInput ? 16 : 8);
+            op += length; ip += length;
+
+            /* The second stage: prepare for match copying, decode full info.
+             * If it doesn't work out, the info won't be wasted. */
+            length = token & ML_MASK; /* match length */
+            offset = LZ4_readLE16(ip); ip += 2;
+            match = op - offset;
+            assert(match <= op); /* check overflow */
+
+            /* Do not deal with overlapping matches. */
+            if ( (length != ML_MASK)
+              && (offset >= 8)
+              && (dict==withPrefix64k || match >= lowPrefix) ) {
+                /* Copy the match. */
+                memcpy(op + 0, match + 0, 8);
+                memcpy(op + 8, match + 8, 8);
+                memcpy(op +16, match +16, 2);
+                op += length + MINMATCH;
+                /* Both stages worked, load the next token. */
+                continue;
+            }
+
+            /* The second stage didn't work out, but the info is ready.
+             * Propel it right to the point of match copying. */
+            goto _copy_match;
+        }
+
+        /* decode literal length */
+        if (length == RUN_MASK) {
             unsigned s;
+            if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error;   /* overflow detection */
             do {
                 s = *ip++;
                 length += s;
@@ -1144,11 +1506,12 @@
 
         /* copy literals */
         cpy = op+length;
-        if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+        LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+        if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+          || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
         {
             if (partialDecoding) {
-                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if (cpy > oend) { cpy = oend; length = oend-op; }             /* Partial decoding : stop in the middle of literal segment */
                 if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
             } else {
                 if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
@@ -1157,19 +1520,31 @@
             memcpy(op, ip, length);
             ip += length;
             op += length;
-            break;     /* Necessarily EOF, due to parsing restrictions */
+            if (!partialDecoding || (cpy == oend)) {
+                /* Necessarily EOF, due to parsing restrictions */
+                break;
+            }
+
+        } else {
+            LZ4_wildCopy(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+            ip += length; op = cpy;
         }
-        LZ4_wildCopy(op, ip, cpy);
-        ip += length; op = cpy;
 
         /* get offset */
         offset = LZ4_readLE16(ip); ip+=2;
         match = op - offset;
-        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
-        LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */
 
         /* get matchlength */
         length = token & ML_MASK;
+
+_copy_match:
+        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+        if (!partialDecoding) {
+            assert(oend > op);
+            assert(oend - op >= 4);
+            LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
+        }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
+
         if (length == ML_MASK) {
             unsigned s;
             do {
@@ -1181,21 +1556,24 @@
         }
         length += MINMATCH;
 
-        /* check external dictionary */
+        /* match starting within external dictionary */
         if ((dict==usingExtDict) && (match < lowPrefix)) {
-            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+            if (unlikely(op+length > oend-LASTLITERALS)) {
+                if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                else goto _output_error;   /* doesn't respect parsing restriction */
+            }
 
             if (length <= (size_t)(lowPrefix-match)) {
-                /* match can be copied as a single segment from external dictionary */
+                /* match fits entirely within external dictionary : just copy */
                 memmove(op, dictEnd - (lowPrefix-match), length);
                 op += length;
             } else {
-                /* match encompass external dictionary and current block */
-                size_t const copySize = (size_t)(lowPrefix-match);
+                /* match stretches into both external dictionary and current block */
+                size_t const copySize = (size_t)(lowPrefix - match);
                 size_t const restSize = length - copySize;
                 memcpy(op, dictEnd - copySize, copySize);
                 op += copySize;
-                if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
+                if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                     BYTE* const endOfMatch = op + restSize;
                     const BYTE* copyFrom = lowPrefix;
                     while (op < endOfMatch) *op++ = *copyFrom++;
@@ -1208,87 +1586,180 @@
 
         /* copy match within block */
         cpy = op + length;
+
+        /* partialDecoding : may not respect endBlock parsing restrictions */
+        assert(op<=oend);
+        if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+            size_t const mlen = MIN(length, (size_t)(oend-op));
+            const BYTE* const matchEnd = match + mlen;
+            BYTE* const copyEnd = op + mlen;
+            if (matchEnd > op) {   /* overlap copy */
+                while (op < copyEnd) *op++ = *match++;
+            } else {
+                memcpy(op, match, mlen);
+            }
+            op = copyEnd;
+            if (op==oend) break;
+            continue;
+        }
+
         if (unlikely(offset<8)) {
-            const int dec64 = dec64table[offset];
             op[0] = match[0];
             op[1] = match[1];
             op[2] = match[2];
             op[3] = match[3];
-            match += dec32table[offset];
+            match += inc32table[offset];
             memcpy(op+4, match, 4);
-            match -= dec64;
-        } else { LZ4_copy8(op, match); match+=8; }
+            match -= dec64table[offset];
+        } else {
+            memcpy(op, match, 8);
+            match += 8;
+        }
         op += 8;
 
-        if (unlikely(cpy>oend-12)) {
-            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+        if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+            BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
             if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
             if (op < oCopyLimit) {
                 LZ4_wildCopy(op, match, oCopyLimit);
                 match += oCopyLimit - op;
                 op = oCopyLimit;
             }
-            while (op<cpy) *op++ = *match++;
+            while (op < cpy) *op++ = *match++;
         } else {
-            LZ4_copy8(op, match);
-            if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
+            memcpy(op, match, 8);
+            if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
         }
-        op=cpy;   /* correction */
+        op = cpy;   /* wildcopy correction */
     }
 
     /* end of decoding */
     if (endOnInput)
-       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+       return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
     else
-       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+       return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
 
     /* Overflow error detected */
 _output_error:
-    return (int) (-(((const char*)ip)-source))-1;
+    return (int) (-(((const char*)ip)-src))-1;
 }
 
 
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest, NULL, 0);
 }
 
-int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
+LZ4_FORCE_O2_GCC_PPC64LE
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  endOnInputSize, partial_decode,
+                                  noDict, (BYTE*)dst, NULL, 0);
 }
 
+LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
 {
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
 }
 
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    /* LZ4_decompress_fast doesn't validate match offsets,
+     * and thus serves well with any prefixed dictionary. */
+    return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2_GCC_PPC64LE
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                               size_t prefixSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2_GCC_PPC64LE
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2_GCC_PPC64LE
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+                                       const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
 
 /*===== streaming decompression functions =====*/
 
-/*
- * If you prefer dynamic allocation methods,
- * LZ4_createStreamDecode()
- * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
- */
 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
 {
-    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
     return lz4s;
 }
 
 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
 {
+    if (!LZ4_stream) return 0;   /* support free on NULL */
     FREEMEM(LZ4_stream);
     return 0;
 }
 
-/*!
- * LZ4_setStreamDecode() :
- * Use this function to instruct where to find the dictionary.
- * This function is not necessary if previous data is still available where it was decoded.
- * Loading a size of 0 is allowed (same effect as no dictionary).
- * Return : 1 if OK, 0 if error
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  This function is not necessary if previous data is still available where it was decoded.
+ *  Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
  */
 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
 {
@@ -1300,6 +1771,25 @@
     return 1;
 }
 
+/*! LZ4_decoderRingBufferSize() :
+ *  when setting a ring buffer for streaming decompression (optional scenario),
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ *  Note : in a ring buffer scenario,
+ *  blocks are presumed decompressed next to each other.
+ *  When not enough space remains for next block (remainingSize < maxBlockSize),
+ *  decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+    if (maxBlockSize < 0) return 0;
+    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+    if (maxBlockSize < 16) maxBlockSize = 16;
+    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
 /*
 *_continue() :
     These decoding functions allow decompression of multiple blocks in "streaming" mode.
@@ -1307,24 +1797,38 @@
     If it's not possible, save the relevant part of decoded data into a safe buffer,
     and indicate where it stands using LZ4_setStreamDecode()
 */
+LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
     int result;
 
-    if (lz4sd->prefixEnd == (BYTE*)dest) {
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+    if (lz4sd->prefixSize == 0) {
+        /* The first call, no dictionary yet. */
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd = (BYTE*)dest + result;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        /* They're rolling the current segment. */
+        if (lz4sd->prefixSize >= 64 KB - 1)
+            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        else if (lz4sd->extDictSize == 0)
+            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+                                                         lz4sd->prefixSize);
+        else
+            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize += result;
         lz4sd->prefixEnd  += result;
     } else {
+        /* The buffer wraps around, or they're switching to another buffer. */
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+                                                  lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize = result;
         lz4sd->prefixEnd  = (BYTE*)dest + result;
@@ -1333,24 +1837,32 @@
     return result;
 }
 
+LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
 {
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
     int result;
 
-    if (lz4sd->prefixEnd == (BYTE*)dest) {
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+    if (lz4sd->prefixSize == 0) {
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_fast(source, dest, originalSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+            result = LZ4_decompress_fast(source, dest, originalSize);
+        else
+            result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize += originalSize;
         lz4sd->prefixEnd  += originalSize;
     } else {
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize = originalSize;
         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
@@ -1367,32 +1879,23 @@
     the dictionary must be explicitly provided within parameters
 */
 
-FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
-{
-    if (dictSize==0)
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
-    if (dictStart+dictSize == dest) {
-        if (dictSize >= (int)(64 KB - 1))
-            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
-    }
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+    if (dictSize==0)
+        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+    if (dictStart+dictSize == dest) {
+        if (dictSize >= 64 KB - 1)
+            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
+    }
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
 }
 
 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
-}
-
-/* debug function */
-int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+    if (dictSize==0 || dictStart+dictSize == dest)
+        return LZ4_decompress_fast(source, dest, originalSize);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
 }
 
 
@@ -1400,64 +1903,67 @@
 *  Obsolete Functions
 ***************************************************/
 /* obsolete compression functions */
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
-int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
-int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
-int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
-int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
-int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
 
 /*
-These function names are deprecated and should no longer be used.
+These decompression functions are deprecated and should no longer be used.
 They are only provided here for compatibility with older user programs.
 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
 */
-int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
-
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
 
 /* Obsolete Streaming functions */
 
 int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
 
-static void LZ4_init(LZ4_stream_t* lz4ds, BYTE* base)
-{
-    MEM_INIT(lz4ds, 0, sizeof(LZ4_stream_t));
-    lz4ds->internal_donotuse.bufferStart = base;
-}
-
 int LZ4_resetStreamState(void* state, char* inputBuffer)
 {
-    if ((((uptrval)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
-    LZ4_init((LZ4_stream_t*)state, (BYTE*)inputBuffer);
+    (void)inputBuffer;
+    LZ4_resetStream((LZ4_stream_t*)state);
     return 0;
 }
 
 void* LZ4_create (char* inputBuffer)
 {
-    LZ4_stream_t* lz4ds = (LZ4_stream_t*)ALLOCATOR(8, sizeof(LZ4_stream_t));
-    LZ4_init (lz4ds, (BYTE*)inputBuffer);
-    return lz4ds;
+    (void)inputBuffer;
+    return LZ4_createStream();
 }
 
-char* LZ4_slideInputBuffer (void* LZ4_Data)
+char* LZ4_slideInputBuffer (void* state)
 {
-    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)LZ4_Data)->internal_donotuse;
-    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
-    return (char*)(ctx->bufferStart + dictSize);
-}
-
-/* Obsolete streaming decompression functions */
-
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
-}
-
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+    /* avoid const char * -> char * conversion warning */
+    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
 }
 
 #endif   /* LZ4_COMMONDEFS_ONLY */
diff --git a/lib/lz4.h b/lib/lz4.h
index 0aae19c..059ef7c 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -1,7 +1,7 @@
 /*
  *  LZ4 - Fast LZ compression algorithm
  *  Header File
- *  Copyright (C) 2011-2016, Yann Collet.
+ *  Copyright (C) 2011-present, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -32,13 +32,13 @@
     - LZ4 homepage : http://www.lz4.org
     - LZ4 source repository : https://github.com/lz4/lz4
 */
-#ifndef LZ4_H_2983827168210
-#define LZ4_H_2983827168210
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
 /* --- Dependency --- */
 #include <stddef.h>   /* size_t */
 
@@ -46,7 +46,7 @@
 /**
   Introduction
 
-  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
+  LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
   scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
   multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
 
@@ -62,8 +62,8 @@
 
   An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
   take care of encoding standard metadata alongside LZ4-compressed blocks.
-  If your application requires interoperability, it's recommended to use it.
-  A library is provided to take care of it, see lz4frame.h.
+  Frame format is required for interoperability.
+  It is delivered through a companion API, declared in lz4frame.h.
 */
 
 /*^***************************************************************
@@ -72,20 +72,28 @@
 /*
 *  LZ4_DLL_EXPORT :
 *  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_VISIBILITY :
+*  Control library symbols visibility.
 */
+#ifndef LZ4LIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4LIB_VISIBILITY
+#  endif
+#endif
 #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
-#  define LZ4LIB_API __declspec(dllexport)
+#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
 #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
-#  define LZ4LIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define LZ4LIB_API
+#  define LZ4LIB_API LZ4LIB_VISIBILITY
 #endif
 
-
-/*========== Version =========== */
+/*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  5    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_MINOR    8    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
@@ -94,8 +102,8 @@
 #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
 #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
 
-LZ4LIB_API int LZ4_versionNumber (void);
-LZ4LIB_API const char* LZ4_versionString (void);
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; unseful to check dll version */
 
 
 /*-************************************
@@ -105,40 +113,40 @@
  * LZ4_MEMORY_USAGE :
  * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
  * Increasing memory usage improves compression ratio
- * Reduced memory usage can improve speed, due to cache effect
+ * Reduced memory usage may improve speed, thanks to cache effect
  * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
-#define LZ4_MEMORY_USAGE 14
-
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
 
 /*-************************************
 *  Simple Functions
 **************************************/
 /*! LZ4_compress_default() :
-    Compresses 'sourceSize' bytes from buffer 'source'
-    into already allocated 'dest' buffer of size 'maxDestSize'.
-    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    Compresses 'srcSize' bytes from buffer 'src'
+    into already allocated 'dst' buffer of size 'dstCapacity'.
+    Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
     It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'source' into a more limited 'dest' budget,
+    If the function cannot compress 'src' into a more limited 'dst' budget,
     compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dest' content is not valid.
-    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
-        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
-        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
-              or 0 if compression fails */
-LZ4LIB_API int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+    Note : as a consequence, 'dst' content is not valid.
+    Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+        srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+        dstCapacity : size of buffer 'dst' (which must be already allocated)
+        return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+                  or 0 if compression fails */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
 
 /*! LZ4_decompress_safe() :
-    compressedSize : is the precise full size of the compressed block.
-    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
-             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+    compressedSize : is the exact complete size of the compressed block.
+    dstCapacity : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+             If destination buffer is not large enough, decoding will stop and output an error code (negative value).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
+             This function is protected against malicious data packets.
 */
-LZ4LIB_API int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
 
 
 /*-************************************
@@ -152,22 +160,22 @@
     Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
     This function is primarily useful for memory allocation purposes (destination buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
         inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
         return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+              or 0, if input size is incorrect (too large or negative)
 */
 LZ4LIB_API int LZ4_compressBound(int inputSize);
 
 /*!
 LZ4_compress_fast() :
-    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
     An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
 */
-LZ4LIB_API int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 
 /*!
@@ -175,58 +183,79 @@
     Same compression function, just using an externally allocated memory space to store compression state.
     Use LZ4_sizeofState() to know how much memory must be allocated,
     and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
+    Then, provide this buffer as 'void* state' to compression function.
 */
 LZ4LIB_API int LZ4_sizeofState(void);
-LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 
-/*!
-LZ4_compress_destSize() :
-    Reverse the logic, by compressing as much data as possible from 'source' buffer
-    into already allocated buffer 'dest' of size 'targetDestSize'.
-    This function either compresses the entire 'source' content into 'dest' if it's large enough,
-    or fill 'dest' buffer completely with as much data as possible from 'source'.
-        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
-                         New value is necessarily <= old value.
-        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
-              or 0 if compression fails
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
 */
-LZ4LIB_API int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
 
 
-/*!
-LZ4_decompress_fast() :
-    originalSize : is the original and therefore uncompressed size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
-    note : This function fully respect memory boundaries for properly formed compressed data.
-           It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionally modified data stream (malicious input).
-           Use this function in trusted environment only (data to decode comes from a trusted source).
-*/
-LZ4LIB_API int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+/*! LZ4_decompress_fast() : **unsafe!**
+ *  This function used to be a bit faster than LZ4_decompress_safe(),
+ *  though situation has changed in recent versions,
+ *  and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`.
+ *  Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data.
+ *  As a consequence, this function is no longer recommended, and may be deprecated in future versions.
+ *  It's only remaining specificity is that it can decompress data without knowing its compressed size.
+ *
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : This function requires uncompressed originalSize to be known in advance.
+ *         The function never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read past the intended input.
+ *         Also, because match offsets are not validated during decoding,
+ *         reads from 'src' may underflow.
+ *         Use this function in trusted environment **only**.
+ */
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
 
-/*!
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'compressedSize' at position 'source'
-    into destination buffer 'dest' of size 'maxDecompressedSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
-*/
-LZ4LIB_API int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective,
+ *  which can boost performance when only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
+ *           and expects targetOutputSize <= dstCapacity.
+ *           It effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in a previous version of this function,
+ *           decoding operation would not "break" a sequence in the middle.
+ *           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           This is no longer necessary.
+ *           The function nonetheless keeps its signature, in an effort to not break API.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 
 
 /*-*********************************************
 *  Streaming Compression Functions
 ***********************************************/
-typedef union LZ4_stream_u LZ4_stream_t;   /* incomplete type (defined later) */
+typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
 
 /*! LZ4_createStream() and LZ4_freeStream() :
  *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
@@ -237,87 +266,219 @@
 
 /*! LZ4_resetStream() :
  *  An LZ4_stream_t structure can be allocated once and re-used multiple times.
- *  Use this function to init an allocated `LZ4_stream_t` structure and start a new compression.
+ *  Use this function to start compressing a new stream.
  */
 LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
 
 /*! LZ4_loadDict() :
- *  Use this function to load a static dictionary into LZ4_stream.
+ *  Use this function to load a static dictionary into LZ4_stream_t.
  *  Any previous data will be forgotten, only 'dictionary' will remain in memory.
- *  Loading a size of 0 is allowed.
- *  Return : dictionary size, in bytes (necessarily <= 64 KB)
+ *  Loading a size of 0 is allowed, and is the same as reset.
+ * @return : dictionary size, in bytes (necessarily <= 64 KB)
  */
 LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
 
 /*! LZ4_compress_fast_continue() :
- *  Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
- *  Important : Previous data blocks are assumed to still be present and unmodified !
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
  *  'dst' buffer must be already allocated.
- *  If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
- *  If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory!
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is invalid, it can only be reset or freed.
  */
-LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 /*! LZ4_saveDict() :
- *  If previously compressed data block is not guaranteed to remain available at its memory location,
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
  *  save it into a safer place (char* safeBuffer).
- *  Note : you don't need to call LZ4_loadDict() afterwards,
- *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
- *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
  */
-LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
 
 
 /*-**********************************************
 *  Streaming Decompression Functions
 *  Bufferless synchronous API
 ************************************************/
-typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* incomplete type (defined later) */
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
 
-/* creation / destruction of streaming decompression tracking structure */
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
 LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 
 /*! LZ4_setStreamDecode() :
- *  Use this function to instruct where to find the dictionary.
- *  Setting a size of 0 is allowed (same effect as reset).
- *  @return : 1 if OK, 0 if error
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ *  Use this function to start decompression of a new stream of blocks.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
  */
 LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 
-/*!
-LZ4_decompress_*_continue() :
-    These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
-    In the case of a ring buffers, decoding buffer must be either :
-    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
-      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
-    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
-      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including small ones ( < 64 KB).
-    - _At least_ 64 KB + 8 bytes + maxBlockSize.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including larger than decoding buffer.
-    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
-    and indicate where it is saved using LZ4_setStreamDecode()
+/*! LZ4_decoderRingBufferSize() : v1.8.2
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs))  /* for static allocation; mbs presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
 */
-LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
 
 
 /*! LZ4_decompress_*_usingDict() :
  *  These decoding functions work the same as
  *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
  *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  */
-LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
-LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
 
 
 /*^**********************************************
  * !!!!!!   STATIC LINKING ONLY   !!!!!!
  ***********************************************/
+
+/*-************************************
+ *  Unstable declarations
+ **************************************
+ * Declarations in this section should be considered unstable.
+ * Use at your own peril, etc., etc.
+ * They may be removed in the future.
+ * Their signatures may change.
+ **************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+/*! LZ4_resetStream_fast() :
+ *  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
+ *  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
+ *
+ *  Note:
+ *  Using this in advance of a non- streaming-compression function is redundant,
+ *  and potentially bad for performance, since they all perform their own custom
+ *  reset internally.
+ *
+ *  Differences from LZ4_resetStream():
+ *  When an LZ4_stream_t is known to be in a internally coherent state,
+ *  it can often be prepared for a new compression with almost no work, only
+ *  sometimes falling back to the full, expensive reset that is always required
+ *  when the stream is in an indeterminate state (i.e., the reset performed by
+ *  LZ4_resetStream()).
+ *
+ *  LZ4_streams are guaranteed to be in a valid state when:
+ *  - returned from LZ4_createStream()
+ *  - reset by LZ4_resetStream()
+ *  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
+ *  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
+ *  - the stream was in a valid state and was then used in any compression call
+ *    that returned success
+ *  - the stream was in an indeterminate state and was used in a compression
+ *    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
+ *    that returned success
+ *
+ *  When a stream isn't known to be in a valid state, it is not safe to pass to
+ *  any fastReset or streaming function. It must first be cleansed by the full
+ *  LZ4_resetStream().
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step. It is only safe
+ *  to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
+ *  initialized"). From a high level, the difference is that this function
+ *  initializes the provided state with a call to something like
+ *  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
+ *  call to LZ4_resetStream().
+ */
+LZ4LIB_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows for the efficient use of a
+ *  static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionary stream pointer may be NULL, in which
+ *  case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
+
+#endif
+
 /*-************************************
  *  Private definitions
  **************************************
@@ -332,14 +493,16 @@
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #include <stdint.h>
 
-typedef struct {
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
     uint32_t hashTable[LZ4_HASH_SIZE_U32];
     uint32_t currentOffset;
-    uint32_t initCheck;
+    uint16_t initCheck;
+    uint16_t tableType;
     const uint8_t* dictionary;
-    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
+    const LZ4_stream_t_internal* dictCtx;
     uint32_t dictSize;
-} LZ4_stream_t_internal;
+};
 
 typedef struct {
     const uint8_t* externalDict;
@@ -350,14 +513,16 @@
 
 #else
 
-typedef struct {
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
     unsigned int hashTable[LZ4_HASH_SIZE_U32];
     unsigned int currentOffset;
-    unsigned int initCheck;
+    unsigned short initCheck;
+    unsigned short tableType;
     const unsigned char* dictionary;
-    unsigned char* bufferStart;   /* obsolete, used for slideInputBuffer */
+    const LZ4_stream_t_internal* dictCtx;
     unsigned int dictSize;
-} LZ4_stream_t_internal;
+};
 
 typedef struct {
     const unsigned char* externalDict;
@@ -374,7 +539,7 @@
  * init this structure before first use.
  * note : only use in association with static linking !
  *        this definition is not API/ABI safe,
- *        and may change in a future version !
+ *        it may change in a future version !
  */
 #define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
 #define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
@@ -400,11 +565,12 @@
 } ;   /* previously typedef'd to LZ4_streamDecode_t */
 
 
-/*=************************************
+/*-************************************
 *  Obsolete Functions
 **************************************/
-/* Deprecation warnings */
-/* Should these warnings be a problem,
+
+/*! Deprecation warnings
+   Should deprecation warnings be a problem,
    it is generally possible to disable them,
    typically with -Wno-deprecated-declarations for gcc
    or _CRT_SECURE_NO_WARNINGS in Visual.
@@ -428,36 +594,38 @@
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
 
 /* Obsolete compression functions */
-LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress               (const char* source, char* dest, int sourceSize);
-LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress               (const char* source, char* dest, int sourceSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
 
 /* Obsolete decompression functions */
-/* These function names are completely deprecated and must no longer be used.
-   They are only provided in lz4.c for compatibility with older programs.
-    - LZ4_uncompress is the same as LZ4_decompress_fast
-    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
-   These function prototypes are now disabled; uncomment them only if you really need them.
-   It is highly recommended to stop using these prototypes and migrate to maintained ones */
-/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
-/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
 
-/* Obsolete streaming functions; use new streaming interface whenever possible */
-LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
-LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API     char* LZ4_slideInputBuffer (void* state);
 
 /* Obsolete streaming decoding functions */
-LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+#endif /* LZ4_H_2983827168210 */
 
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif /* LZ4_H_2983827168210 */
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index a0a625b..08bf0fa 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -47,10 +47,24 @@
 
 
 /*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4F_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4F_HEAPMODE
+#  define LZ4F_HEAPMODE 0
+#endif
+
+
+/*-************************************
 *  Memory routines
 **************************************/
 #include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOCATOR(s)   calloc(1,s)
+#define ALLOC(s)   malloc(s)
+#define ALLOC_AND_ZERO(s)   calloc(1,s)
 #define FREEMEM        free
 #include <string.h>   /* memset, memcpy, memmove */
 #define MEM_INIT       memset
@@ -59,17 +73,41 @@
 /*-************************************
 *  Includes
 **************************************/
-#include "lz4frame_static.h"
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#define LZ4_STATIC_LINKING_ONLY
 #include "lz4.h"
+#define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
 #define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"
 
 
 /*-************************************
-*  Common Utils
+*  Debug
 **************************************/
-#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4F_STATIC_ASSERT(c)    { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+#  include <stdio.h>
+static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                                  \
+                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+                    fprintf(stderr, __FILE__ ": ");           \
+                    fprintf(stderr, __VA_ARGS__);             \
+                    fprintf(stderr, " \n");                   \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
 
 
 /*-************************************
@@ -158,7 +196,7 @@
 #define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
 
 static const size_t minFHSize = 7;
-static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX;   /* 15 */
+static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX;   /* 19 */
 static const size_t BHSize = 4;
 
 
@@ -170,6 +208,7 @@
     LZ4F_preferences_t prefs;
     U32    version;
     U32    cStage;
+    const LZ4F_CDict* cdict;
     size_t maxBlockSize;
     size_t maxBufferSize;
     BYTE*  tmpBuff;
@@ -178,7 +217,8 @@
     U64    totalInSize;
     XXH32_state_t xxh;
     void*  lz4CtxPtr;
-    U32    lz4CtxLevel;     /* 0: unallocated;  1: LZ4_stream_t;  3: LZ4_streamHC_t */
+    U16    lz4CtxAlloc; /* sized for: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+    U16    lz4CtxState; /* in use as: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
 } LZ4F_cctx_t;
 
 
@@ -209,12 +249,15 @@
 
 static LZ4F_errorCode_t err0r(LZ4F_errorCodes code)
 {
-    LZ4_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));    /* A compilation error here means sizeof(ptrdiff_t) is not large enough */
+    /* A compilation error here means sizeof(ptrdiff_t) is not large enough */
+    LZ4F_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
     return (LZ4F_errorCode_t)-(ptrdiff_t)code;
 }
 
 unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; }
 
+int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; }
+
 
 /*-************************************
 *  Private functions
@@ -241,7 +284,8 @@
 /*-************************************
 *  Simple-pass compression functions
 **************************************/
-static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID, const size_t srcSize)
+static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID,
+                                           const size_t srcSize)
 {
     LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB;
     size_t maxBlockSize = 64 KB;
@@ -254,134 +298,229 @@
     return requestedBSID;
 }
 
-/* LZ4F_compressBound() :
- * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
- * prefsPtr is optional : you can provide NULL as argument, preferences will be set to cover worst case scenario.
- * Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
- * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+/*! LZ4F_compressBound_internal() :
+ *  Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
+ *  prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario.
+ * @return is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers.
+ *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
  */
-static size_t LZ4F_compressBound_internal(size_t srcSize, const LZ4F_preferences_t* preferencesPtr, size_t alreadyBuffered)
+static size_t LZ4F_compressBound_internal(size_t srcSize,
+                                    const LZ4F_preferences_t* preferencesPtr,
+                                          size_t alreadyBuffered)
 {
     LZ4F_preferences_t prefsNull;
-    memset(&prefsNull, 0, sizeof(prefsNull));
+    MEM_INIT(&prefsNull, 0, sizeof(prefsNull));
     prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;   /* worst case */
     {   const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
         U32 const flush = prefsPtr->autoFlush | (srcSize==0);
-        LZ4F_blockSizeID_t const bid = prefsPtr->frameInfo.blockSizeID;
-        size_t const blockSize = LZ4F_getBlockSize(bid);
+        LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
+        size_t const blockSize = LZ4F_getBlockSize(blockID);
         size_t const maxBuffered = blockSize - 1;
         size_t const bufferedSize = MIN(alreadyBuffered, maxBuffered);
         size_t const maxSrcSize = srcSize + bufferedSize;
         unsigned const nbFullBlocks = (unsigned)(maxSrcSize / blockSize);
-        size_t const partialBlockSize = (srcSize - (srcSize==0)) & (blockSize-1);   /* 0 => -1 == MAX => blockSize-1 */
+        size_t const partialBlockSize = maxSrcSize & (blockSize-1);
         size_t const lastBlockSize = flush ? partialBlockSize : 0;
         unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0);
 
-        size_t const blockHeaderSize = 4;   /* default, without block CRC option (which cannot be generated with current API) */
+        size_t const blockHeaderSize = 4;
+        size_t const blockCRCSize = 4 * prefsPtr->frameInfo.blockChecksumFlag;
         size_t const frameEnd = 4 + (prefsPtr->frameInfo.contentChecksumFlag*4);
 
-        return (blockHeaderSize * nbBlocks) + (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;;
+        return ((blockHeaderSize + blockCRCSize) * nbBlocks) +
+               (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;
     }
 }
 
 size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
 {
     LZ4F_preferences_t prefs;
-    size_t const headerSize = maxFHSize;      /* max header size, including magic number and frame content size */
+    size_t const headerSize = maxFHSize;      /* max header size, including optional fields */
 
     if (preferencesPtr!=NULL) prefs = *preferencesPtr;
-    else memset(&prefs, 0, sizeof(prefs));
+    else MEM_INIT(&prefs, 0, sizeof(prefs));
     prefs.autoFlush = 1;
 
     return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);;
 }
 
 
-/*! LZ4F_compressFrame() :
-* Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.0, in a single step.
-* The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
-* You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
-* If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode)
-* The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will then be set to default.
-* The result of the function is the number of bytes written into dstBuffer.
-* The function outputs an error code if it fails (can be tested using LZ4F_isError())
-*/
-size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+/*! LZ4F_compressFrame_usingCDict() :
+ *  Compress srcBuffer using a dictionary, in a single step.
+ *  cdict can be NULL, in which case, no dictionary is used.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide a dictID, so it's not recommended.
+ * @return : number of bytes written into dstBuffer,
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+                                     void* dstBuffer, size_t dstCapacity,
+                               const void* srcBuffer, size_t srcSize,
+                               const LZ4F_CDict* cdict,
+                               const LZ4F_preferences_t* preferencesPtr)
 {
-    LZ4F_cctx_t cctxI;
-    LZ4_stream_t lz4ctx;
     LZ4F_preferences_t prefs;
     LZ4F_compressOptions_t options;
     BYTE* const dstStart = (BYTE*) dstBuffer;
     BYTE* dstPtr = dstStart;
     BYTE* const dstEnd = dstStart + dstCapacity;
 
-    memset(&cctxI, 0, sizeof(cctxI));   /* works because no allocation */
-    memset(&options, 0, sizeof(options));
-
-    cctxI.version = LZ4F_VERSION;
-    cctxI.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent allocation; works because autoflush==1 & stableSrc==1 */
-
     if (preferencesPtr!=NULL)
         prefs = *preferencesPtr;
     else
-        memset(&prefs, 0, sizeof(prefs));
+        MEM_INIT(&prefs, 0, sizeof(prefs));
     if (prefs.frameInfo.contentSize != 0)
         prefs.frameInfo.contentSize = (U64)srcSize;   /* auto-correct content size if selected (!=0) */
 
-    if (prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
-        cctxI.lz4CtxPtr = &lz4ctx;
-        cctxI.lz4CtxLevel = 1;
-    }
-
     prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
     prefs.autoFlush = 1;
     if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
-        prefs.frameInfo.blockMode = LZ4F_blockIndependent;   /* no need for linked blocks */
+        prefs.frameInfo.blockMode = LZ4F_blockIndependent;   /* only one block => no need for inter-block link */
 
+    MEM_INIT(&options, 0, sizeof(options));
     options.stableSrc = 1;
 
-    if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs))
+    if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs))  /* condition to guarantee success */
         return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
 
-    { size_t const headerSize = LZ4F_compressBegin(&cctxI, dstBuffer, dstCapacity, &prefs);  /* write header */
+    { size_t const headerSize = LZ4F_compressBegin_usingCDict(cctx, dstBuffer, dstCapacity, cdict, &prefs);  /* write header */
       if (LZ4F_isError(headerSize)) return headerSize;
       dstPtr += headerSize;   /* header size */ }
 
-    { size_t const cSize = LZ4F_compressUpdate(&cctxI, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
+    { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
       if (LZ4F_isError(cSize)) return cSize;
       dstPtr += cSize; }
 
-    { size_t const tailSize = LZ4F_compressEnd(&cctxI, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
+    { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
       if (LZ4F_isError(tailSize)) return tailSize;
       dstPtr += tailSize; }
 
-    if (prefs.compressionLevel >= LZ4HC_CLEVEL_MIN)   /* no allocation done with lz4 fast */
-        FREEMEM(cctxI.lz4CtxPtr);
-
     return (dstPtr - dstStart);
 }
 
 
+/*! LZ4F_compressFrame() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame, in a single step.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                    const void* srcBuffer, size_t srcSize,
+                    const LZ4F_preferences_t* preferencesPtr)
+{
+    size_t result;
+#if (LZ4F_HEAPMODE)
+    LZ4F_cctx_t *cctxPtr;
+    result = LZ4F_createCompressionContext(&cctxPtr, LZ4F_VERSION);
+    if (LZ4F_isError(result)) return result;
+#else
+    LZ4F_cctx_t cctx;
+    LZ4_stream_t lz4ctx;
+    LZ4F_cctx_t *cctxPtr = &cctx;
+
+    DEBUGLOG(4, "LZ4F_compressFrame");
+    MEM_INIT(&cctx, 0, sizeof(cctx));
+    cctx.version = LZ4F_VERSION;
+    cctx.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
+    if (preferencesPtr == NULL ||
+        preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN)
+    {
+        LZ4_resetStream(&lz4ctx);
+        cctxPtr->lz4CtxPtr = &lz4ctx;
+        cctxPtr->lz4CtxAlloc = 1;
+        cctxPtr->lz4CtxState = 1;
+    }
+#endif
+
+    result = LZ4F_compressFrame_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+                                           srcBuffer, srcSize,
+                                           NULL, preferencesPtr);
+
+#if (LZ4F_HEAPMODE)
+    LZ4F_freeCompressionContext(cctxPtr);
+#else
+    if (preferencesPtr != NULL &&
+        preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN)
+    {
+        FREEMEM(cctxPtr->lz4CtxPtr);
+    }
+#endif
+    return result;
+}
+
+
+/*-***************************************************
+*   Dictionary compression
+*****************************************************/
+
+struct LZ4F_CDict_s {
+    void* dictContent;
+    LZ4_stream_t* fastCtx;
+    LZ4_streamHC_t* HCCtx;
+}; /* typedef'd to LZ4F_CDict within lz4frame_static.h */
+
+/*! LZ4F_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  LZ4F_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4F_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4F_CDict creation, since its content is copied within CDict
+ * @return : digested dictionary for compression, or NULL if failed */
+LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize)
+{
+    const char* dictStart = (const char*)dictBuffer;
+    LZ4F_CDict* cdict = (LZ4F_CDict*) ALLOC(sizeof(*cdict));
+    DEBUGLOG(4, "LZ4F_createCDict");
+    if (!cdict) return NULL;
+    if (dictSize > 64 KB) {
+        dictStart += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    cdict->dictContent = ALLOC(dictSize);
+    cdict->fastCtx = LZ4_createStream();
+    cdict->HCCtx = LZ4_createStreamHC();
+    if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
+        LZ4F_freeCDict(cdict);
+        return NULL;
+    }
+    memcpy(cdict->dictContent, dictStart, dictSize);
+    LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
+    LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
+    LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
+    return cdict;
+}
+
+void LZ4F_freeCDict(LZ4F_CDict* cdict)
+{
+    if (cdict==NULL) return;  /* support free on NULL */
+    FREEMEM(cdict->dictContent);
+    LZ4_freeStream(cdict->fastCtx);
+    LZ4_freeStreamHC(cdict->HCCtx);
+    FREEMEM(cdict);
+}
+
+
 /*-*********************************
 *  Advanced compression functions
 ***********************************/
 
 /*! LZ4F_createCompressionContext() :
- * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
- * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
- * The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
- * The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
- * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
- * Object can release its memory using LZ4F_freeCompressionContext();
+ *  The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ *  This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ *  The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries.
+ *  The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+ *  If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+ *  Object can release its memory using LZ4F_freeCompressionContext();
  */
 LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version)
 {
-    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOCATOR(sizeof(LZ4F_cctx_t));
+    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t));
     if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed);
 
     cctxPtr->version = version;
-    cctxPtr->cStage = 0;   /* Next stage : write header */
+    cctxPtr->cStage = 0;   /* Next stage : init stream */
 
     *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr;
 
@@ -393,8 +532,8 @@
 {
     LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext;
 
-    if (cctxPtr != NULL) {  /* null pointers can be safely provided to this function, like free() */
-       FREEMEM(cctxPtr->lz4CtxPtr);
+    if (cctxPtr != NULL) {  /* support free on NULL */
+       FREEMEM(cctxPtr->lz4CtxPtr);  /* works because LZ4_streamHC_t and LZ4_stream_t are simple POD types */
        FREEMEM(cctxPtr->tmpBuff);
        FREEMEM(LZ4F_compressionContext);
     }
@@ -403,59 +542,110 @@
 }
 
 
-/*! LZ4F_compressBegin() :
- * will write the frame header into dstBuffer.
- * dstBuffer must be large enough to accommodate a header (dstCapacity). Maximum header size is LZ4F_HEADER_SIZE_MAX bytes.
+/**
+ * This function prepares the internal LZ4(HC) stream for a new compression,
+ * resetting the context and attaching the dictionary, if there is one.
+ *
+ * It needs to be called at the beginning of each independent compression
+ * stream (i.e., at the beginning of a frame in blockLinked mode, or at the
+ * beginning of each block in blockIndependent mode).
+ */
+static void LZ4F_initStream(void* ctx,
+                            const LZ4F_CDict* cdict,
+                            int level,
+                            LZ4F_blockMode_t blockMode) {
+    if (level < LZ4HC_CLEVEL_MIN) {
+        if (cdict != NULL || blockMode == LZ4F_blockLinked) {
+            /* In these cases, we will call LZ4_compress_fast_continue(),
+             * which needs an already reset context. Otherwise, we'll call a
+             * one-shot API. The non-continued APIs internally perform their own
+             * resets at the beginning of their calls, where they know what
+             * tableType they need the context to be in. So in that case this
+             * would be misguided / wasted work. */
+            LZ4_resetStream_fast((LZ4_stream_t*)ctx);
+        }
+        LZ4_attach_dictionary((LZ4_stream_t *)ctx, cdict ? cdict->fastCtx : NULL);
+    } else {
+        LZ4_resetStreamHC_fast((LZ4_streamHC_t*)ctx, level);
+        LZ4_attach_HC_dictionary((LZ4_streamHC_t *)ctx, cdict ? cdict->HCCtx : NULL);
+    }
+}
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  init streaming compression and writes frame header into dstBuffer.
+ *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
  * @return : number of bytes written into dstBuffer for the header
  *           or an error code (can be tested using LZ4F_isError())
  */
-size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* preferencesPtr)
+size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
+                          void* dstBuffer, size_t dstCapacity,
+                          const LZ4F_CDict* cdict,
+                          const LZ4F_preferences_t* preferencesPtr)
 {
     LZ4F_preferences_t prefNull;
     BYTE* const dstStart = (BYTE*)dstBuffer;
     BYTE* dstPtr = dstStart;
     BYTE* headerStart;
-    size_t requiredBuffSize;
 
     if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
-    if (cctxPtr->cStage != 0) return err0r(LZ4F_ERROR_GENERIC);
-    memset(&prefNull, 0, sizeof(prefNull));
+    MEM_INIT(&prefNull, 0, sizeof(prefNull));
     if (preferencesPtr == NULL) preferencesPtr = &prefNull;
     cctxPtr->prefs = *preferencesPtr;
 
-    /* ctx Management */
-    {   U32 const tableID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;  /* 0:nothing ; 1:LZ4 table ; 2:HC tables */
-        if (cctxPtr->lz4CtxLevel < tableID) {
+    /* Ctx Management */
+    {   U16 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;
+        if (cctxPtr->lz4CtxAlloc < ctxTypeID) {
             FREEMEM(cctxPtr->lz4CtxPtr);
-            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
                 cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
-            else
+            } else {
                 cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC();
-            cctxPtr->lz4CtxLevel = tableID;
+            }
+            if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+            cctxPtr->lz4CtxAlloc = ctxTypeID;
+            cctxPtr->lz4CtxState = ctxTypeID;
+        } else if (cctxPtr->lz4CtxState != ctxTypeID) {
+            /* otherwise, a sufficient buffer is allocated, but we need to
+             * reset it to the correct context type */
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+                LZ4_resetStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr);
+            } else {
+                LZ4_resetStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+            }
+            cctxPtr->lz4CtxState = ctxTypeID;
         }
     }
 
     /* Buffer Management */
-    if (cctxPtr->prefs.frameInfo.blockSizeID == 0) cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    if (cctxPtr->prefs.frameInfo.blockSizeID == 0)
+        cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
     cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
 
-    requiredBuffSize = cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB);
-    if (preferencesPtr->autoFlush)
-        requiredBuffSize = (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB;   /* just needs dict */
+    {   size_t const requiredBuffSize = preferencesPtr->autoFlush ?
+                (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB :  /* only needs windows size */
+                cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB);
 
-    if (cctxPtr->maxBufferSize < requiredBuffSize) {
-        cctxPtr->maxBufferSize = requiredBuffSize;
-        FREEMEM(cctxPtr->tmpBuff);
-        cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize);
-        if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed);
-    }
+        if (cctxPtr->maxBufferSize < requiredBuffSize) {
+            cctxPtr->maxBufferSize = 0;
+            FREEMEM(cctxPtr->tmpBuff);
+            cctxPtr->tmpBuff = (BYTE*)ALLOC_AND_ZERO(requiredBuffSize);
+            if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+            cctxPtr->maxBufferSize = requiredBuffSize;
+    }   }
     cctxPtr->tmpIn = cctxPtr->tmpBuff;
     cctxPtr->tmpInSize = 0;
     XXH32_reset(&(cctxPtr->xxh), 0);
-    if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
-        LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr));
-    else
-        LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel);
+
+    /* context init */
+    cctxPtr->cdict = cdict;
+    if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) {
+        /* frame init only for blockLinked : blockIndependent will be init at each block */
+        LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked);
+    }
+    if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) {
+          LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
+    }
 
     /* Magic Number */
     LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
@@ -464,9 +654,11 @@
 
     /* FLG Byte */
     *dstPtr++ = (BYTE)(((1 & _2BITS) << 6)    /* Version('01') */
-        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)    /* Block mode */
-        + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)   /* Frame checksum */
-        + ((cctxPtr->prefs.frameInfo.contentSize > 0) << 3));   /* Frame content size */
+        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
+        + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
+        + ((cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
+        + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
+        +  (cctxPtr->prefs.frameInfo.dictID > 0) );
     /* BD Byte */
     *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
     /* Optional Frame content size field */
@@ -475,20 +667,40 @@
         dstPtr += 8;
         cctxPtr->totalInSize = 0;
     }
-    /* CRC Byte */
+    /* Optional dictionary ID field */
+    if (cctxPtr->prefs.frameInfo.dictID) {
+        LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID);
+        dstPtr += 4;
+    }
+    /* Header CRC Byte */
     *dstPtr = LZ4F_headerChecksum(headerStart, dstPtr - headerStart);
     dstPtr++;
 
     cctxPtr->cStage = 1;   /* header written, now request input data block */
-
     return (dstPtr - dstStart);
 }
 
 
-/* LZ4F_compressBound() :
- *      @ return size of Dst buffer given a srcSize to handle worst case situations.
- *      The LZ4F_frameInfo_t structure is optional : if NULL, preferences will be set to cover worst case situations.
- *      This function cannot fail.
+/*! LZ4F_compressBegin() :
+ *  init streaming compression and writes frame header into dstBuffer.
+ *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ *  preferencesPtr can be NULL, in which case default parameters are selected.
+ * @return : number of bytes written into dstBuffer for the header
+ *           or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr,
+                          void* dstBuffer, size_t dstCapacity,
+                          const LZ4F_preferences_t* preferencesPtr)
+{
+    return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+                                         NULL, preferencesPtr);
+}
+
+
+/*  LZ4F_compressBound() :
+ * @return minimum capacity of dstBuffer for a given srcSize to handle worst case scenario.
+ *  LZ4F_preferences_t structure is optional : if NULL, preferences will be set to cover worst case scenario.
+ *  This function cannot fail.
  */
 size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
 {
@@ -496,49 +708,75 @@
 }
 
 
-typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level);
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level, const LZ4F_CDict* cdict);
 
-static size_t LZ4F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz4ctx, int level)
+
+/*! LZ4F_makeBlock():
+ *  compress a single block, add header and checksum
+ *  assumption : dst buffer capacity is >= srcSize */
+static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize,
+                             compressFunc_t compress, void* lz4ctx, int level,
+                             const LZ4F_CDict* cdict, LZ4F_blockChecksum_t crcFlag)
 {
-    /* compress a single block */
     BYTE* const cSizePtr = (BYTE*)dst;
-    U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1), level);
+    U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4),
+                                      (int)(srcSize), (int)(srcSize-1),
+                                      level, cdict);
     LZ4F_writeLE32(cSizePtr, cSize);
     if (cSize == 0) {  /* compression failed */
         cSize = (U32)srcSize;
         LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
         memcpy(cSizePtr+4, src, srcSize);
     }
-    return cSize + 4;
+    if (crcFlag) {
+        U32 const crc32 = XXH32(cSizePtr+4, cSize, 0);  /* checksum of compressed data */
+        LZ4F_writeLE32(cSizePtr+4+cSize, crc32);
+    }
+    return 4 + cSize + ((U32)crcFlag)*4;
 }
 
 
-static int LZ4F_localLZ4_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level)
+static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
-    (void) level;
-    return LZ4_compress_fast_extState(ctx, src, dst, srcSize, dstCapacity, 1);
+    int const acceleration = (level < 0) ? -level + 1 : 1;
+    LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+    if (cdict) {
+        return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+    } else {
+        return LZ4_compress_fast_extState_fastReset(ctx, src, dst, srcSize, dstCapacity, acceleration);
+    }
 }
 
-static int LZ4F_localLZ4_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level)
+static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
-    (void) level;
-    return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, 1);
+    int const acceleration = (level < 0) ? -level + 1 : 1;
+    (void)cdict; /* init once at beginning of frame */
+    return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
 }
 
-static int LZ4F_localLZ4_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
-    (void) level;
-    return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstSize);
+    LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+    if (cdict) {
+        return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+    }
+    return LZ4_compress_HC_extStateHC_fastReset(ctx, src, dst, srcSize, dstCapacity, level);
+}
+
+static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    (void)level; (void)cdict; /* init once at beginning of frame */
+    return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
 }
 
 static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level)
 {
     if (level < LZ4HC_CLEVEL_MIN) {
-        if (blockMode == LZ4F_blockIndependent) return LZ4F_localLZ4_compress_limitedOutput_withState;
-        return LZ4F_localLZ4_compress_limitedOutput_continue;
+        if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlock;
+        return LZ4F_compressBlock_continue;
     }
-    if (blockMode == LZ4F_blockIndependent) return LZ4_compress_HC_extStateHC;
-    return LZ4F_localLZ4_compressHC_limitedOutput_continue;
+    if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlockHC;
+    return LZ4F_compressBlockHC_continue;
 }
 
 static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
@@ -551,15 +789,16 @@
 typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
 
 /*! LZ4F_compressUpdate() :
-* LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
-* The most important rule is that dstBuffer MUST be large enough (dstCapacity) to ensure compression completion even in worst case.
-* If this condition is not respected, LZ4F_compress() will fail (result is an errorCode)
-* You can get the minimum value of dstCapacity by using LZ4F_compressBound()
-* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
-* The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered.
-* The function outputs an error code if it fails (can be tested using LZ4F_isError())
-*/
-size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+ *  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ *  dstBuffer MUST be >= LZ4F_compressBound(srcSize, preferencesPtr).
+ *  LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
+                           void* dstBuffer, size_t dstCapacity,
+                     const void* srcBuffer, size_t srcSize,
+                     const LZ4F_compressOptions_t* compressOptionsPtr)
 {
     LZ4F_compressOptions_t cOptionsNull;
     size_t const blockSize = cctxPtr->maxBlockSize;
@@ -570,10 +809,12 @@
     LZ4F_lastBlockStatus lastBlockCompressed = notDone;
     compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
 
+    DEBUGLOG(4, "LZ4F_compressUpdate (srcSize=%zu)", srcSize);
 
     if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
-    if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
-    memset(&cOptionsNull, 0, sizeof(cOptionsNull));
+    if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize))
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    MEM_INIT(&cOptionsNull, 0, sizeof(cOptionsNull));
     if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
 
     /* complete tmp buffer */
@@ -591,7 +832,9 @@
             memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
             srcPtr += sizeToCopy;
 
-            dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+            dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, blockSize,
+                                     compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                     cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
 
             if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
             cctxPtr->tmpInSize = 0;
@@ -599,16 +842,20 @@
     }
 
     while ((size_t)(srcEnd - srcPtr) >= blockSize) {
-        /* compress full block */
+        /* compress full blocks */
         lastBlockCompressed = fromSrcBuffer;
-        dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+        dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, blockSize,
+                                 compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                 cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
         srcPtr += blockSize;
     }
 
     if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
         /* compress remaining input < blockSize */
         lastBlockCompressed = fromSrcBuffer;
-        dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+        dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, srcEnd - srcPtr,
+                                 compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                 cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
         srcPtr  = srcEnd;
     }
 
@@ -617,7 +864,7 @@
         if (compressOptionsPtr->stableSrc) {
             cctxPtr->tmpIn = cctxPtr->tmpBuff;
         } else {
-            int realDictSize = LZ4F_localSaveDict(cctxPtr);
+            int const realDictSize = LZ4F_localSaveDict(cctxPtr);
             if (realDictSize==0) return err0r(LZ4F_ERROR_GENERIC);
             cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
         }
@@ -627,7 +874,7 @@
     if ((cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)   /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */
         && !(cctxPtr->prefs.autoFlush))
     {
-        int realDictSize = LZ4F_localSaveDict(cctxPtr);
+        int const realDictSize = LZ4F_localSaveDict(cctxPtr);
         cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
     }
 
@@ -648,13 +895,13 @@
 
 
 /*! LZ4F_flush() :
-* Should you need to create compressed data immediately, without waiting for a block to be filled,
-* you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
-* The result of the function is the number of bytes written into dstBuffer
-* (it can be zero, this means there was no data left within compressionContext)
-* The function outputs an error code if it fails (can be tested using LZ4F_isError())
-* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
-*/
+ *  Should you need to create compressed data immediately, without waiting for a block to be filled,
+ *  you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
+ *  The result of the function is the number of bytes written into dstBuffer
+ *  (it can be zero, this means there was no data left within compressionContext)
+ *  The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ *  The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ */
 size_t LZ4F_flush(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* compressOptionsPtr)
 {
     BYTE* const dstStart = (BYTE*)dstBuffer;
@@ -670,7 +917,9 @@
     compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
 
     /* compress tmp buffer */
-    dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+    dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize,
+                             compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                             cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
     if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize;
     cctxPtr->tmpInSize = 0;
 
@@ -685,14 +934,14 @@
 
 
 /*! LZ4F_compressEnd() :
-* When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
-* It will flush whatever data remained within compressionContext (like LZ4_flush())
-* but also properly finalize the frame, with an endMark and a checksum.
-* The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
-* The function outputs an error code if it fails (can be tested using LZ4F_isError())
-* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
-* compressionContext can then be used again, starting with LZ4F_compressBegin(). The preferences will remain the same.
-*/
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and a checksum.
+ * The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * compressionContext can then be used again, starting with LZ4F_compressBegin(). The preferences will remain the same.
+ */
 size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
 {
     BYTE* const dstStart = (BYTE*)dstBuffer;
@@ -727,10 +976,22 @@
 *   Frame Decompression
 *****************************************************/
 
+typedef enum {
+    dstage_getFrameHeader=0, dstage_storeFrameHeader,
+    dstage_init,
+    dstage_getBlockHeader, dstage_storeBlockHeader,
+    dstage_copyDirect, dstage_getBlockChecksum,
+    dstage_getCBlock, dstage_storeCBlock,
+    dstage_flushOut,
+    dstage_getSuffix, dstage_storeSuffix,
+    dstage_getSFrameSize, dstage_storeSFrameSize,
+    dstage_skipSkippable
+} dStage_t;
+
 struct LZ4F_dctx_s {
     LZ4F_frameInfo_t frameInfo;
     U32    version;
-    U32    dStage;
+    dStage_t dStage;
     U64    frameRemainingSize;
     size_t maxBlockSize;
     size_t maxBufferSize;
@@ -738,40 +999,41 @@
     size_t tmpInSize;
     size_t tmpInTarget;
     BYTE*  tmpOutBuffer;
-    const BYTE*  dict;
+    const BYTE* dict;
     size_t dictSize;
     BYTE*  tmpOut;
     size_t tmpOutSize;
     size_t tmpOutStart;
     XXH32_state_t xxh;
-    BYTE   header[16];
+    XXH32_state_t blockChecksum;
+    BYTE   header[LZ4F_HEADER_SIZE_MAX];
 };  /* typedef'd to LZ4F_dctx in lz4frame.h */
 
 
 /*! LZ4F_createDecompressionContext() :
-*   Create a decompressionContext object, which will track all decompression operations.
-*   Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
-*   Object can later be released using LZ4F_freeDecompressionContext().
-*   @return : if != 0, there was an error during context creation.
-*/
+ *  Create a decompressionContext object, which will track all decompression operations.
+ *  Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+ *  Object can later be released using LZ4F_freeDecompressionContext().
+ * @return : if != 0, there was an error during context creation.
+ */
 LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
 {
-    LZ4F_dctx* const dctxPtr = (LZ4F_dctx*)ALLOCATOR(sizeof(LZ4F_dctx));
-    if (dctxPtr==NULL) return err0r(LZ4F_ERROR_GENERIC);
+    LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOC_AND_ZERO(sizeof(LZ4F_dctx));
+    if (dctx==NULL) return err0r(LZ4F_ERROR_GENERIC);
 
-    dctxPtr->version = versionNumber;
-    *LZ4F_decompressionContextPtr = dctxPtr;
+    dctx->version = versionNumber;
+    *LZ4F_decompressionContextPtr = dctx;
     return LZ4F_OK_NoError;
 }
 
-LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* const dctxPtr)
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx)
 {
     LZ4F_errorCode_t result = LZ4F_OK_NoError;
-    if (dctxPtr != NULL) {   /* can accept NULL input, like free() */
-      result = (LZ4F_errorCode_t)dctxPtr->dStage;
-      FREEMEM(dctxPtr->tmpIn);
-      FREEMEM(dctxPtr->tmpOutBuffer);
-      FREEMEM(dctxPtr);
+    if (dctx != NULL) {   /* can accept NULL input, like free() */
+      result = (LZ4F_errorCode_t)dctx->dStage;
+      FREEMEM(dctx->tmpIn);
+      FREEMEM(dctx->tmpOutBuffer);
+      FREEMEM(dctx);
     }
     return result;
 }
@@ -779,22 +1041,18 @@
 
 /*==---   Streaming Decompression operations   ---==*/
 
-typedef enum { dstage_getHeader=0, dstage_storeHeader,
-    dstage_getCBlockSize, dstage_storeCBlockSize,
-    dstage_copyDirect,
-    dstage_getCBlock, dstage_storeCBlock,
-    dstage_decodeCBlock, dstage_decodeCBlock_intoDst,
-    dstage_decodeCBlock_intoTmp, dstage_flushOut,
-    dstage_getSuffix, dstage_storeSuffix,
-    dstage_getSFrameSize, dstage_storeSFrameSize,
-    dstage_skipSkippable
-} dStage_t;
+void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
+{
+    dctx->dStage = dstage_getFrameHeader;
+    dctx->dict = NULL;
+    dctx->dictSize = 0;
+}
 
 
 /*! LZ4F_headerSize() :
-*   @return : size of frame header
-*             or an error code, which can be tested using LZ4F_isError()
-*/
+ *   @return : size of frame header
+ *             or an error code, which can be tested using LZ4F_isError()
+ */
 static size_t LZ4F_headerSize(const void* src, size_t srcSize)
 {
     /* minimal srcSize to determine header size */
@@ -804,244 +1062,252 @@
     if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) return 8;
 
     /* control magic number */
-    if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER) return err0r(LZ4F_ERROR_frameType_unknown);
+    if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
+        return err0r(LZ4F_ERROR_frameType_unknown);
 
     /* Frame Header Size */
     {   BYTE const FLG = ((const BYTE*)src)[4];
         U32 const contentSizeFlag = (FLG>>3) & _1BIT;
-        return contentSizeFlag ? maxFHSize : minFHSize;
+        U32 const dictIDFlag = FLG & _1BIT;
+        return minFHSize + (contentSizeFlag*8) + (dictIDFlag*4);
     }
 }
 
 
 /*! LZ4F_decodeHeader() :
-   input   : `src` points at the **beginning of the frame**
-   output  : set internal values of dctx, such as
-             dctxPtr->frameInfo and dctxPtr->dStage.
-             Also allocates internal buffers.
-   @return : nb Bytes read from srcVoidPtr (necessarily <= srcSize)
-             or an error code (testable with LZ4F_isError())
-*/
-static size_t LZ4F_decodeHeader(LZ4F_dctx* dctxPtr, const void* src, size_t srcSize)
+ *  input   : `src` points at the **beginning of the frame**
+ *  output  : set internal values of dctx, such as
+ *            dctx->frameInfo and dctx->dStage.
+ *            Also allocates internal buffers.
+ *  @return : nb Bytes read from src (necessarily <= srcSize)
+ *            or an error code (testable with LZ4F_isError())
+ */
+static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize)
 {
-    BYTE FLG, BD;
-    unsigned version, blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, blockSizeID;
+    unsigned blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictIDFlag, blockSizeID;
     size_t frameHeaderSize;
     const BYTE* srcPtr = (const BYTE*)src;
 
     /* need to decode header to get frameInfo */
     if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete);   /* minimal frame header size */
-    memset(&(dctxPtr->frameInfo), 0, sizeof(dctxPtr->frameInfo));
+    MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
 
     /* special case : skippable frames */
     if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
-        dctxPtr->frameInfo.frameType = LZ4F_skippableFrame;
-        if (src == (void*)(dctxPtr->header)) {
-            dctxPtr->tmpInSize = srcSize;
-            dctxPtr->tmpInTarget = 8;
-            dctxPtr->dStage = dstage_storeSFrameSize;
+        dctx->frameInfo.frameType = LZ4F_skippableFrame;
+        if (src == (void*)(dctx->header)) {
+            dctx->tmpInSize = srcSize;
+            dctx->tmpInTarget = 8;
+            dctx->dStage = dstage_storeSFrameSize;
             return srcSize;
         } else {
-            dctxPtr->dStage = dstage_getSFrameSize;
+            dctx->dStage = dstage_getSFrameSize;
             return 4;
         }
     }
 
     /* control magic number */
-    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) return err0r(LZ4F_ERROR_frameType_unknown);
-    dctxPtr->frameInfo.frameType = LZ4F_frame;
+    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER)
+        return err0r(LZ4F_ERROR_frameType_unknown);
+    dctx->frameInfo.frameType = LZ4F_frame;
 
     /* Flags */
-    FLG = srcPtr[4];
-    version = (FLG>>6) & _2BITS;
-    blockMode = (FLG>>5) & _1BIT;
-    blockChecksumFlag = (FLG>>4) & _1BIT;
-    contentSizeFlag = (FLG>>3) & _1BIT;
-    contentChecksumFlag = (FLG>>2) & _1BIT;
+    {   U32 const FLG = srcPtr[4];
+        U32 const version = (FLG>>6) & _2BITS;
+        blockChecksumFlag = (FLG>>4) & _1BIT;
+        blockMode = (FLG>>5) & _1BIT;
+        contentSizeFlag = (FLG>>3) & _1BIT;
+        contentChecksumFlag = (FLG>>2) & _1BIT;
+        dictIDFlag = FLG & _1BIT;
+        /* validate */
+        if (((FLG>>1)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */
+        if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong);        /* Version Number, only supported value */
+    }
 
     /* Frame Header Size */
-    frameHeaderSize = contentSizeFlag ? maxFHSize : minFHSize;
+    frameHeaderSize = minFHSize + (contentSizeFlag*8) + (dictIDFlag*4);
 
     if (srcSize < frameHeaderSize) {
         /* not enough input to fully decode frame header */
-        if (srcPtr != dctxPtr->header)
-            memcpy(dctxPtr->header, srcPtr, srcSize);
-        dctxPtr->tmpInSize = srcSize;
-        dctxPtr->tmpInTarget = frameHeaderSize;
-        dctxPtr->dStage = dstage_storeHeader;
+        if (srcPtr != dctx->header)
+            memcpy(dctx->header, srcPtr, srcSize);
+        dctx->tmpInSize = srcSize;
+        dctx->tmpInTarget = frameHeaderSize;
+        dctx->dStage = dstage_storeFrameHeader;
         return srcSize;
     }
 
-    BD = srcPtr[5];
-    blockSizeID = (BD>>4) & _3BITS;
-
-    /* validate */
-    if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong);        /* Version Number, only supported value */
-    if (blockChecksumFlag != 0) return err0r(LZ4F_ERROR_blockChecksum_unsupported); /* Not supported for the time being */
-    if (((FLG>>0)&_2BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bits */
-    if (((BD>>7)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);   /* Reserved bit */
-    if (blockSizeID < 4) return err0r(LZ4F_ERROR_maxBlockSize_invalid);    /* 4-7 only supported values for the time being */
-    if (((BD>>0)&_4BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);  /* Reserved bits */
+    {   U32 const BD = srcPtr[5];
+        blockSizeID = (BD>>4) & _3BITS;
+        /* validate */
+        if (((BD>>7)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);   /* Reserved bit */
+        if (blockSizeID < 4) return err0r(LZ4F_ERROR_maxBlockSize_invalid);    /* 4-7 only supported values for the time being */
+        if (((BD>>0)&_4BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);  /* Reserved bits */
+    }
 
     /* check header */
-    { BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
-      if (HC != srcPtr[frameHeaderSize-1]) return err0r(LZ4F_ERROR_headerChecksum_invalid); }
+    {   BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
+        if (HC != srcPtr[frameHeaderSize-1])
+            return err0r(LZ4F_ERROR_headerChecksum_invalid);
+    }
 
     /* save */
-    dctxPtr->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
-    dctxPtr->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
-    dctxPtr->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
-    dctxPtr->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
+    dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
+    dctx->frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)blockChecksumFlag;
+    dctx->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
+    dctx->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
+    dctx->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
     if (contentSizeFlag)
-        dctxPtr->frameRemainingSize = dctxPtr->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+        dctx->frameRemainingSize =
+            dctx->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+    if (dictIDFlag)
+        dctx->frameInfo.dictID = LZ4F_readLE32(srcPtr + frameHeaderSize - 5);
 
-    /* init */
-    if (contentChecksumFlag) XXH32_reset(&(dctxPtr->xxh), 0);
-
-    /* internal buffers allocation */
-    {   size_t const bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==LZ4F_blockLinked) * 128 KB);
-        if (bufferNeeded > dctxPtr->maxBufferSize) {   /* tmp buffers too small */
-            FREEMEM(dctxPtr->tmpIn);
-            dctxPtr->tmpIn = (BYTE*)ALLOCATOR(dctxPtr->maxBlockSize);
-            if (dctxPtr->tmpIn == NULL) return err0r(LZ4F_ERROR_allocation_failed);
-            FREEMEM(dctxPtr->tmpOutBuffer);
-            dctxPtr->maxBufferSize = 0;
-            dctxPtr->tmpOutBuffer= (BYTE*)ALLOCATOR(bufferNeeded);
-            if (dctxPtr->tmpOutBuffer== NULL) return err0r(LZ4F_ERROR_allocation_failed);
-            dctxPtr->maxBufferSize = bufferNeeded;
-    }   }
-    dctxPtr->tmpInSize = 0;
-    dctxPtr->tmpInTarget = 0;
-    dctxPtr->dict = dctxPtr->tmpOutBuffer;
-    dctxPtr->dictSize = 0;
-    dctxPtr->tmpOut = dctxPtr->tmpOutBuffer;
-    dctxPtr->tmpOutStart = 0;
-    dctxPtr->tmpOutSize = 0;
-
-    dctxPtr->dStage = dstage_getCBlockSize;
+    dctx->dStage = dstage_init;
 
     return frameHeaderSize;
 }
 
 
 /*! LZ4F_getFrameInfo() :
-*   Decodes frame header information, such as blockSize. Usage is optional.
-*   The objective is to extract header information before receiving decompressed data, typically for allocation purposes.
-*   LZ4F_getFrameInfo() can also be used *after* starting decompression, on a valid LZ4F_decompressionContext_t.
-*   The number of bytes consumed from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
-*   Decompression must resume from where it stopped (srcBuffer + *srcSizePtr)
-*   @return : hint of the better `srcSize` to use for next call to LZ4F_decompress,
-*             or an error code which can be tested using LZ4F_isError().
-*/
-LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctxPtr, LZ4F_frameInfo_t* frameInfoPtr,
+ *  This function extracts frame parameters (max blockSize, frame checksum, etc.).
+ *  Usage is optional. Objective is to provide relevant information for allocation purposes.
+ *  This function works in 2 situations :
+ *   - At the beginning of a new frame, in which case it will decode this information from `srcBuffer`, and start the decoding process.
+ *     Amount of input data provided must be large enough to successfully decode the frame header.
+ *     A header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. It's possible to provide more input data than this minimum.
+ *   - After decoding has been started. In which case, no input is read, frame parameters are extracted from dctx.
+ *  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ *  Decompression must resume from (srcBuffer + *srcSizePtr).
+ * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *           or an error code which can be tested using LZ4F_isError()
+ *  note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped.
+ *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoPtr,
                                    const void* srcBuffer, size_t* srcSizePtr)
 {
-    if (dctxPtr->dStage > dstage_storeHeader) {  /* note : requires dstage_* header related to be at beginning of enum */
+    if (dctx->dStage > dstage_storeFrameHeader) {  /* assumption :  dstage_* header enum at beginning of range */
         /* frameInfo already decoded */
         size_t o=0, i=0;
         *srcSizePtr = 0;
-        *frameInfoPtr = dctxPtr->frameInfo;
-        return LZ4F_decompress(dctxPtr, NULL, &o, NULL, &i, NULL);  /* returns : recommended nb of bytes for LZ4F_decompress() */
+        *frameInfoPtr = dctx->frameInfo;
+        /* returns : recommended nb of bytes for LZ4F_decompress() */
+        return LZ4F_decompress(dctx, NULL, &o, NULL, &i, NULL);
     } else {
-        size_t nextSrcSize, o=0;
-        size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
-        if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
-        if (*srcSizePtr < hSize) { *srcSizePtr=0; return err0r(LZ4F_ERROR_frameHeader_incomplete); }
+        if (dctx->dStage == dstage_storeFrameHeader) {
+            /* frame decoding already started, in the middle of header => automatic fail */
+            *srcSizePtr = 0;
+            return err0r(LZ4F_ERROR_frameDecoding_alreadyStarted);
+        } else {
+            size_t decodeResult;
+            size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
+            if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
+            if (*srcSizePtr < hSize) {
+                *srcSizePtr=0;
+                return err0r(LZ4F_ERROR_frameHeader_incomplete);
+            }
 
-        *srcSizePtr = hSize;
-        nextSrcSize = LZ4F_decompress(dctxPtr, NULL, &o, srcBuffer, srcSizePtr, NULL);
-        if (dctxPtr->dStage <= dstage_storeHeader) return err0r(LZ4F_ERROR_frameHeader_incomplete); /* should not happen, already checked */
-        *frameInfoPtr = dctxPtr->frameInfo;
-        return nextSrcSize;
-    }
+            decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
+            if (LZ4F_isError(decodeResult)) {
+                *srcSizePtr = 0;
+            } else {
+                *srcSizePtr = decodeResult;
+                decodeResult = BHSize;   /* block header size */
+            }
+            *frameInfoPtr = dctx->frameInfo;
+            return decodeResult;
+    }   }
 }
 
 
-/* trivial redirector, for common prototype */
-static int LZ4F_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize)
+/* LZ4F_updateDict() :
+ * only used for LZ4F_blockLinked mode */
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+                      const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+                      unsigned withinTmp)
 {
-    (void)dictStart; (void)dictSize;
-    return LZ4_decompress_safe (source, dest, compressedSize, maxDecompressedSize);
-}
+    if (dctx->dictSize==0)
+        dctx->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
 
-
-static void LZ4F_updateDict(LZ4F_dctx* dctxPtr, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
-{
-    if (dctxPtr->dictSize==0)
-        dctxPtr->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
-
-    if (dctxPtr->dict + dctxPtr->dictSize == dstPtr) {  /* dictionary continuity */
-        dctxPtr->dictSize += dstSize;
+    if (dctx->dict + dctx->dictSize == dstPtr) {  /* dictionary continuity, directly within dstBuffer */
+        dctx->dictSize += dstSize;
         return;
     }
 
-    if (dstPtr - dstPtr0 + dstSize >= 64 KB) {  /* dstBuffer large enough to become dictionary */
-        dctxPtr->dict = (const BYTE*)dstPtr0;
-        dctxPtr->dictSize = dstPtr - dstPtr0 + dstSize;
+    if (dstPtr - dstBufferStart + dstSize >= 64 KB) {  /* history in dstBuffer becomes large enough to become dictionary */
+        dctx->dict = (const BYTE*)dstBufferStart;
+        dctx->dictSize = dstPtr - dstBufferStart + dstSize;
         return;
     }
 
-    if ((withinTmp) && (dctxPtr->dict == dctxPtr->tmpOutBuffer)) {
-        /* assumption : dctxPtr->dict + dctxPtr->dictSize == dctxPtr->tmpOut + dctxPtr->tmpOutStart */
-        dctxPtr->dictSize += dstSize;
+    assert(dstSize < 64 KB);   /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+    /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */
+
+    if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {   /* continue history within tmpOutBuffer */
+        /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+        assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
+        dctx->dictSize += dstSize;
         return;
     }
 
     if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
-        size_t const preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
-        size_t copySize = 64 KB - dctxPtr->tmpOutSize;
-        const BYTE* const oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
-        if (dctxPtr->tmpOutSize > 64 KB) copySize = 0;
+        size_t const preserveSize = dctx->tmpOut - dctx->tmpOutBuffer;
+        size_t copySize = 64 KB - dctx->tmpOutSize;
+        const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+        if (dctx->tmpOutSize > 64 KB) copySize = 0;
         if (copySize > preserveSize) copySize = preserveSize;
 
-        memcpy(dctxPtr->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+        memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
 
-        dctxPtr->dict = dctxPtr->tmpOutBuffer;
-        dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart + dstSize;
+        dctx->dict = dctx->tmpOutBuffer;
+        dctx->dictSize = preserveSize + dctx->tmpOutStart + dstSize;
         return;
     }
 
-    if (dctxPtr->dict == dctxPtr->tmpOutBuffer) {    /* copy dst into tmp to complete dict */
-        if (dctxPtr->dictSize + dstSize > dctxPtr->maxBufferSize) {  /* tmp buffer not large enough */
-            size_t const preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
-            memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
-            dctxPtr->dictSize = preserveSize;
+    if (dctx->dict == dctx->tmpOutBuffer) {    /* copy dst into tmp to complete dict */
+        if (dctx->dictSize + dstSize > dctx->maxBufferSize) {  /* tmp buffer not large enough */
+            size_t const preserveSize = 64 KB - dstSize;
+            memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+            dctx->dictSize = preserveSize;
         }
-        memcpy(dctxPtr->tmpOutBuffer + dctxPtr->dictSize, dstPtr, dstSize);
-        dctxPtr->dictSize += dstSize;
+        memcpy(dctx->tmpOutBuffer + dctx->dictSize, dstPtr, dstSize);
+        dctx->dictSize += dstSize;
         return;
     }
 
     /* join dict & dest into tmp */
-    {   size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
-        if (preserveSize > dctxPtr->dictSize) preserveSize = dctxPtr->dictSize;
-        memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
-        memcpy(dctxPtr->tmpOutBuffer + preserveSize, dstPtr, dstSize);
-        dctxPtr->dict = dctxPtr->tmpOutBuffer;
-        dctxPtr->dictSize = preserveSize + dstSize;
+    {   size_t preserveSize = 64 KB - dstSize;
+        if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
+        memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+        memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+        dctx->dict = dctx->tmpOutBuffer;
+        dctx->dictSize = preserveSize + dstSize;
     }
 }
 
 
 
 /*! LZ4F_decompress() :
-* Call this function repetitively to regenerate data compressed within srcBuffer.
-* The function will attempt to decode up to *srcSizePtr bytes from srcBuffer, into dstBuffer of capacity *dstSizePtr.
-*
-* The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
-*
-* The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
-* If the number of bytes read is < number of bytes provided, then the decompression operation is not complete.
-* Remaining data will have to be presented again in a subsequent invocation.
-*
-* The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
-* Basically, it's the size of the current (or remaining) compressed block + header of next block.
-* Respecting the hint provides some boost to performance, since it allows less buffer shuffling.
-* Note that this is just a hint, it's always possible to any srcSize value.
-* When a frame is fully decoded, @return will be 0.
-* If decompression failed, @return is an error code which can be tested using LZ4F_isError().
-*/
-size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
+ *  Call this function repetitively to regenerate compressed data in srcBuffer.
+ *  The function will attempt to decode up to *srcSizePtr bytes from srcBuffer
+ *  into dstBuffer of capacity *dstSizePtr.
+ *
+ *  The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ *  The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ *  If number of bytes read is < number of bytes provided, then decompression operation is not complete.
+ *  Remaining data will have to be presented again in a subsequent invocation.
+ *
+ *  The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides a small boost to performance, since it allows less buffer shuffling.
+ *  Note that this is just a hint, and it's always possible to any srcSize value.
+ *  When a frame is fully decoded, @return will be 0.
+ *  If decompression failed, @return is an error code which can be tested using LZ4F_isError().
+ */
+size_t LZ4F_decompress(LZ4F_dctx* dctx,
                        void* dstBuffer, size_t* dstSizePtr,
                        const void* srcBuffer, size_t* srcSizePtr,
                        const LZ4F_decompressOptions_t* decompressOptionsPtr)
@@ -1058,271 +1324,353 @@
     size_t nextSrcSizeHint = 1;
 
 
-    memset(&optionsNull, 0, sizeof(optionsNull));
+    MEM_INIT(&optionsNull, 0, sizeof(optionsNull));
     if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
     *srcSizePtr = 0;
     *dstSizePtr = 0;
 
-    /* programmed as a state machine */
+    /* behaves as a state machine */
 
     while (doAnotherStage) {
 
-        switch(dctxPtr->dStage)
+        switch(dctx->dStage)
         {
 
-        case dstage_getHeader:
+        case dstage_getFrameHeader:
             if ((size_t)(srcEnd-srcPtr) >= maxFHSize) {  /* enough to decode - shortcut */
-                LZ4F_errorCode_t const hSize = LZ4F_decodeHeader(dctxPtr, srcPtr, srcEnd-srcPtr);
+                size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, srcEnd-srcPtr);  /* will update dStage appropriately */
                 if (LZ4F_isError(hSize)) return hSize;
                 srcPtr += hSize;
                 break;
             }
-            dctxPtr->tmpInSize = 0;
-            dctxPtr->tmpInTarget = minFHSize;   /* minimum to attempt decode */
-            dctxPtr->dStage = dstage_storeHeader;
-            /* pass-through */
+            dctx->tmpInSize = 0;
+            if (srcEnd-srcPtr == 0) return minFHSize;   /* 0-size input */
+            dctx->tmpInTarget = minFHSize;   /* minimum size to decode header */
+            dctx->dStage = dstage_storeFrameHeader;
+            /* fall-through */
 
-        case dstage_storeHeader:
-            {   size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
-                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy =  srcEnd - srcPtr;
-                memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
-                dctxPtr->tmpInSize += sizeToCopy;
+        case dstage_storeFrameHeader:
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr));
+                memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                dctx->tmpInSize += sizeToCopy;
                 srcPtr += sizeToCopy;
-                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) {
-                    nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + BHSize;   /* rest of header + nextBlockHeader */
-                    doAnotherStage = 0;   /* not enough src data, ask for some more */
-                    break;
-                }
-                {   LZ4F_errorCode_t const hSize = LZ4F_decodeHeader(dctxPtr, dctxPtr->header, dctxPtr->tmpInTarget);
-                    if (LZ4F_isError(hSize)) return hSize;
-                }
+            }
+            if (dctx->tmpInSize < dctx->tmpInTarget) {
+                nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize;   /* rest of header + nextBlockHeader */
+                doAnotherStage = 0;   /* not enough src data, ask for some more */
                 break;
             }
+            {   size_t const hSize = LZ4F_decodeHeader(dctx, dctx->header, dctx->tmpInTarget);  /* will update dStage appropriately */
+                if (LZ4F_isError(hSize)) return hSize;
+            }
+            break;
 
-        case dstage_getCBlockSize:
+        case dstage_init:
+            if (dctx->frameInfo.contentChecksumFlag) XXH32_reset(&(dctx->xxh), 0);
+            /* internal buffers allocation */
+            {   size_t const bufferNeeded = dctx->maxBlockSize
+                    + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) * 128 KB);
+                if (bufferNeeded > dctx->maxBufferSize) {   /* tmp buffers too small */
+                    dctx->maxBufferSize = 0;   /* ensure allocation will be re-attempted on next entry*/
+                    FREEMEM(dctx->tmpIn);
+                    dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + 4 /* block checksum */);
+                    if (dctx->tmpIn == NULL)
+                        return err0r(LZ4F_ERROR_allocation_failed);
+                    FREEMEM(dctx->tmpOutBuffer);
+                    dctx->tmpOutBuffer= (BYTE*)ALLOC(bufferNeeded);
+                    if (dctx->tmpOutBuffer== NULL)
+                        return err0r(LZ4F_ERROR_allocation_failed);
+                    dctx->maxBufferSize = bufferNeeded;
+            }   }
+            dctx->tmpInSize = 0;
+            dctx->tmpInTarget = 0;
+            dctx->tmpOut = dctx->tmpOutBuffer;
+            dctx->tmpOutStart = 0;
+            dctx->tmpOutSize = 0;
+
+            dctx->dStage = dstage_getBlockHeader;
+            /* fall-through */
+
+        case dstage_getBlockHeader:
             if ((size_t)(srcEnd - srcPtr) >= BHSize) {
                 selectedIn = srcPtr;
                 srcPtr += BHSize;
             } else {
                 /* not enough input to read cBlockSize field */
-                dctxPtr->tmpInSize = 0;
-                dctxPtr->dStage = dstage_storeCBlockSize;
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeBlockHeader;
             }
 
-            if (dctxPtr->dStage == dstage_storeCBlockSize)   /* can be skipped */
-        case dstage_storeCBlockSize:
-            {   size_t sizeToCopy = BHSize - dctxPtr->tmpInSize;
-                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
-                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+            if (dctx->dStage == dstage_storeBlockHeader)   /* can be skipped */
+        case dstage_storeBlockHeader:
+            {   size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+                size_t const wantedData = BHSize - dctx->tmpInSize;
+                size_t const sizeToCopy = MIN(wantedData, remainingInput);
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
-                dctxPtr->tmpInSize += sizeToCopy;
-                if (dctxPtr->tmpInSize < BHSize) {   /* not enough input to get full cBlockSize; wait for more */
-                    nextSrcSizeHint = BHSize - dctxPtr->tmpInSize;
+                dctx->tmpInSize += sizeToCopy;
+
+                if (dctx->tmpInSize < BHSize) {   /* not enough input for cBlockSize */
+                    nextSrcSizeHint = BHSize - dctx->tmpInSize;
                     doAnotherStage  = 0;
                     break;
                 }
-                selectedIn = dctxPtr->tmpIn;
-            }
+                selectedIn = dctx->tmpIn;
+            }   /* if (dctx->dStage == dstage_storeBlockHeader) */
 
-        /* case dstage_decodeCBlockSize: */   /* no more direct access, to prevent scan-build warning */
+        /* decode block header */
             {   size_t const nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
-                if (nextCBlockSize==0) {  /* frameEnd signal, no more CBlock */
-                    dctxPtr->dStage = dstage_getSuffix;
+                size_t const crcSize = dctx->frameInfo.blockChecksumFlag * 4;
+                if (nextCBlockSize==0) {  /* frameEnd signal, no more block */
+                    dctx->dStage = dstage_getSuffix;
                     break;
                 }
-                if (nextCBlockSize > dctxPtr->maxBlockSize) return err0r(LZ4F_ERROR_GENERIC);   /* invalid cBlockSize */
-                dctxPtr->tmpInTarget = nextCBlockSize;
+                if (nextCBlockSize > dctx->maxBlockSize)
+                    return err0r(LZ4F_ERROR_maxBlockSize_invalid);
                 if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
-                    dctxPtr->dStage = dstage_copyDirect;
+                    /* next block is uncompressed */
+                    dctx->tmpInTarget = nextCBlockSize;
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        XXH32_reset(&dctx->blockChecksum, 0);
+                    }
+                    dctx->dStage = dstage_copyDirect;
                     break;
                 }
-                dctxPtr->dStage = dstage_getCBlock;
+                /* next block is a compressed block */
+                dctx->tmpInTarget = nextCBlockSize + crcSize;
+                dctx->dStage = dstage_getCBlock;
                 if (dstPtr==dstEnd) {
-                    nextSrcSizeHint = nextCBlockSize + BHSize;
+                    nextSrcSizeHint = nextCBlockSize + crcSize + BHSize;
                     doAnotherStage = 0;
                 }
                 break;
             }
 
         case dstage_copyDirect:   /* uncompressed block */
-            {   size_t sizeToCopy = dctxPtr->tmpInTarget;
-                if ((size_t)(srcEnd-srcPtr) < sizeToCopy) sizeToCopy = srcEnd - srcPtr;  /* not enough input to read full block */
-                if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr;
+            {   size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
+                size_t const sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
                 memcpy(dstPtr, srcPtr, sizeToCopy);
-                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, sizeToCopy);
-                if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= sizeToCopy;
+                if (dctx->frameInfo.blockChecksumFlag) {
+                    XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+                }
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= sizeToCopy;
 
-                /* dictionary management */
-                if (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
-                    LZ4F_updateDict(dctxPtr, dstPtr, sizeToCopy, dstStart, 0);
+                /* history management (linked blocks only)*/
+                if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
 
                 srcPtr += sizeToCopy;
                 dstPtr += sizeToCopy;
-                if (sizeToCopy == dctxPtr->tmpInTarget) {  /* all copied */
-                    dctxPtr->dStage = dstage_getCBlockSize;
+                if (sizeToCopy == dctx->tmpInTarget) {   /* all done */
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        dctx->tmpInSize = 0;
+                        dctx->dStage = dstage_getBlockChecksum;
+                    } else
+                        dctx->dStage = dstage_getBlockHeader;  /* new block */
                     break;
                 }
-                dctxPtr->tmpInTarget -= sizeToCopy;   /* still need to copy more */
-                nextSrcSizeHint = dctxPtr->tmpInTarget + BHSize;
+                dctx->tmpInTarget -= sizeToCopy;  /* need to copy more */
+                nextSrcSizeHint = dctx->tmpInTarget +
+                                + dctx->frameInfo.contentChecksumFlag * 4  /* block checksum */
+                                + BHSize /* next header size */;
                 doAnotherStage = 0;
                 break;
             }
 
-        case dstage_getCBlock:   /* entry from dstage_decodeCBlockSize */
-            if ((size_t)(srcEnd-srcPtr) < dctxPtr->tmpInTarget) {
-                dctxPtr->tmpInSize = 0;
-                dctxPtr->dStage = dstage_storeCBlock;
-                break;
-            }
-            selectedIn = srcPtr;
-            srcPtr += dctxPtr->tmpInTarget;
-            dctxPtr->dStage = dstage_decodeCBlock;
+        /* check block checksum for recently transferred uncompressed block */
+        case dstage_getBlockChecksum:
+            {   const void* crcSrc;
+                if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) {
+                    crcSrc = srcPtr;
+                    srcPtr += 4;
+                } else {
+                    size_t const stillToCopy = 4 - dctx->tmpInSize;
+                    size_t const sizeToCopy = MIN(stillToCopy, (size_t)(srcEnd-srcPtr));
+                    memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                    dctx->tmpInSize += sizeToCopy;
+                    srcPtr += sizeToCopy;
+                    if (dctx->tmpInSize < 4) {  /* all input consumed */
+                        doAnotherStage = 0;
+                        break;
+                    }
+                    crcSrc = dctx->header;
+                }
+                {   U32 const readCRC = LZ4F_readLE32(crcSrc);
+                    U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
+                    if (readCRC != calcCRC)
+                        return err0r(LZ4F_ERROR_blockChecksum_invalid);
+            }   }
+            dctx->dStage = dstage_getBlockHeader;  /* new block */
             break;
 
+        case dstage_getCBlock:
+            if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) {
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeCBlock;
+                break;
+            }
+            /* input large enough to read full block directly */
+            selectedIn = srcPtr;
+            srcPtr += dctx->tmpInTarget;
+
+            if (0)  /* jump over next block */
         case dstage_storeCBlock:
-            {   size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
-                if (sizeToCopy > (size_t)(srcEnd-srcPtr)) sizeToCopy = srcEnd-srcPtr;
-                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
-                dctxPtr->tmpInSize += sizeToCopy;
+            {   size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize;
+                size_t const inputLeft = (size_t)(srcEnd-srcPtr);
+                size_t const sizeToCopy = MIN(wantedData, inputLeft);
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                dctx->tmpInSize += sizeToCopy;
                 srcPtr += sizeToCopy;
-                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) { /* need more input */
-                    nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + BHSize;
+                if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */
+                    nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize;
                     doAnotherStage=0;
                     break;
                 }
-                selectedIn = dctxPtr->tmpIn;
-                dctxPtr->dStage = dstage_decodeCBlock;
-                /* pass-through */
+                selectedIn = dctx->tmpIn;
             }
 
-        case dstage_decodeCBlock:
-            if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize)   /* not enough place into dst : decode into tmpOut */
-                dctxPtr->dStage = dstage_decodeCBlock_intoTmp;
-            else
-                dctxPtr->dStage = dstage_decodeCBlock_intoDst;
-            break;
+            /* At this stage, input is large enough to decode a block */
+            if (dctx->frameInfo.blockChecksumFlag) {
+                dctx->tmpInTarget -= 4;
+                assert(selectedIn != NULL);  /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */
+                {   U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
+                    U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
+                    if (readBlockCrc != calcBlockCrc)
+                        return err0r(LZ4F_ERROR_blockChecksum_invalid);
+            }   }
 
-        case dstage_decodeCBlock_intoDst:
-            {   int (*decoder)(const char*, char*, int, int, const char*, int);
+            if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
+                const char* dict = (const char*)dctx->dict;
+                size_t dictSize = dctx->dictSize;
                 int decodedSize;
-
-                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
-                    decoder = LZ4_decompress_safe_usingDict;
-                else
-                    decoder = LZ4F_decompress_safe;
-
-                decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
+                if (dict && dictSize > 1 GB) {
+                    /* the dictSize param is an int, avoid truncation / sign issues */
+                    dict += dictSize - 64 KB;
+                    dictSize = 64 KB;
+                }
+                /* enough capacity in `dst` to decompress directly there */
+                decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dstPtr,
+                        (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+                        dict, (int)dictSize);
                 if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC);   /* decompression failed */
-                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize);
-                if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize;
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&(dctx->xxh), dstPtr, decodedSize);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= decodedSize;
 
                 /* dictionary management */
-                if (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
-                    LZ4F_updateDict(dctxPtr, dstPtr, decodedSize, dstStart, 0);
+                if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, decodedSize, dstStart, 0);
 
                 dstPtr += decodedSize;
-                dctxPtr->dStage = dstage_getCBlockSize;
+                dctx->dStage = dstage_getBlockHeader;
                 break;
             }
 
-        case dstage_decodeCBlock_intoTmp:
             /* not enough place into dst : decode into tmpOut */
-            {   int (*decoder)(const char*, char*, int, int, const char*, int);
-                int decodedSize;
-
-                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
-                    decoder = LZ4_decompress_safe_usingDict;
-                else
-                    decoder = LZ4F_decompress_safe;
-
-                /* ensure enough place for tmpOut */
-                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked) {
-                    if (dctxPtr->dict == dctxPtr->tmpOutBuffer) {
-                        if (dctxPtr->dictSize > 128 KB) {
-                            memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - 64 KB, 64 KB);
-                            dctxPtr->dictSize = 64 KB;
-                        }
-                        dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize;
-                    } else {  /* dict not within tmp */
-                        size_t reservedDictSpace = dctxPtr->dictSize;
-                        if (reservedDictSpace > 64 KB) reservedDictSpace = 64 KB;
-                        dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + reservedDictSpace;
+            /* ensure enough place for tmpOut */
+            if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+                if (dctx->dict == dctx->tmpOutBuffer) {
+                    if (dctx->dictSize > 128 KB) {
+                        memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - 64 KB, 64 KB);
+                        dctx->dictSize = 64 KB;
                     }
-                }
+                    dctx->tmpOut = dctx->tmpOutBuffer + dctx->dictSize;
+                } else {  /* dict not within tmp */
+                    size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
+                    dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
+            }   }
 
-                /* Decode */
-                decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
-                if (decodedSize < 0) return err0r(LZ4F_ERROR_decompressionFailed);   /* decompression failed */
-                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize);
-                if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize;
-                dctxPtr->tmpOutSize = decodedSize;
-                dctxPtr->tmpOutStart = 0;
-                dctxPtr->dStage = dstage_flushOut;
-                break;
+            /* Decode block */
+            {   const char* dict = (const char*)dctx->dict;
+                size_t dictSize = dctx->dictSize;
+                int decodedSize;
+                if (dict && dictSize > 1 GB) {
+                    /* the dictSize param is an int, avoid truncation / sign issues */
+                    dict += dictSize - 64 KB;
+                    dictSize = 64 KB;
+                }
+                decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dctx->tmpOut,
+                        (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+                        dict, (int)dictSize);
+                if (decodedSize < 0)  /* decompression failed */
+                    return err0r(LZ4F_ERROR_decompressionFailed);
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&(dctx->xxh), dctx->tmpOut, decodedSize);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= decodedSize;
+                dctx->tmpOutSize = decodedSize;
+                dctx->tmpOutStart = 0;
+                dctx->dStage = dstage_flushOut;
             }
+            /* fall-through */
 
         case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
-            {   size_t sizeToCopy = dctxPtr->tmpOutSize - dctxPtr->tmpOutStart;
-                if (sizeToCopy > (size_t)(dstEnd-dstPtr)) sizeToCopy = dstEnd-dstPtr;
-                memcpy(dstPtr, dctxPtr->tmpOut + dctxPtr->tmpOutStart, sizeToCopy);
+            {   size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
+                memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
 
                 /* dictionary management */
-                if (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
-                    LZ4F_updateDict(dctxPtr, dstPtr, sizeToCopy, dstStart, 1);
+                if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
 
-                dctxPtr->tmpOutStart += sizeToCopy;
+                dctx->tmpOutStart += sizeToCopy;
                 dstPtr += sizeToCopy;
 
-                /* end of flush ? */
-                if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize) {
-                    dctxPtr->dStage = dstage_getCBlockSize;
+                if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
+                    dctx->dStage = dstage_getBlockHeader;  /* get next block */
                     break;
                 }
+                /* could not flush everything : stop there, just request a block header */
+                doAnotherStage = 0;
                 nextSrcSizeHint = BHSize;
-                doAnotherStage = 0;   /* still some data to flush */
                 break;
             }
 
         case dstage_getSuffix:
-            {   size_t const suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4;
-                if (dctxPtr->frameRemainingSize) return err0r(LZ4F_ERROR_frameSize_wrong);   /* incorrect frame size decoded */
-                if (suffixSize == 0) {  /* frame completed */
-                    nextSrcSizeHint = 0;
-                    dctxPtr->dStage = dstage_getHeader;
-                    doAnotherStage = 0;
-                    break;
-                }
-                if ((srcEnd - srcPtr) < 4) {  /* not enough size for entire CRC */
-                    dctxPtr->tmpInSize = 0;
-                    dctxPtr->dStage = dstage_storeSuffix;
-                } else {
-                    selectedIn = srcPtr;
-                    srcPtr += 4;
-                }
+            if (dctx->frameRemainingSize)
+                return err0r(LZ4F_ERROR_frameSize_wrong);   /* incorrect frame size decoded */
+            if (!dctx->frameInfo.contentChecksumFlag) {  /* no checksum, frame is completed */
+                nextSrcSizeHint = 0;
+                LZ4F_resetDecompressionContext(dctx);
+                doAnotherStage = 0;
+                break;
+            }
+            if ((srcEnd - srcPtr) < 4) {  /* not enough size for entire CRC */
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeSuffix;
+            } else {
+                selectedIn = srcPtr;
+                srcPtr += 4;
             }
 
-            if (dctxPtr->dStage == dstage_storeSuffix)   /* can be skipped */
+            if (dctx->dStage == dstage_storeSuffix)   /* can be skipped */
         case dstage_storeSuffix:
-            {
-                size_t sizeToCopy = 4 - dctxPtr->tmpInSize;
-                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
-                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+            {   size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+                size_t const wantedData = 4 - dctx->tmpInSize;
+                size_t const sizeToCopy = MIN(wantedData, remainingInput);
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
-                dctxPtr->tmpInSize += sizeToCopy;
-                if (dctxPtr->tmpInSize < 4) { /* not enough input to read complete suffix */
-                    nextSrcSizeHint = 4 - dctxPtr->tmpInSize;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < 4) { /* not enough input to read complete suffix */
+                    nextSrcSizeHint = 4 - dctx->tmpInSize;
                     doAnotherStage=0;
                     break;
                 }
-                selectedIn = dctxPtr->tmpIn;
-            }
+                selectedIn = dctx->tmpIn;
+            }   /* if (dctx->dStage == dstage_storeSuffix) */
 
-        /* case dstage_checkSuffix: */   /* no direct call, to avoid scan-build warning */
+        /* case dstage_checkSuffix: */   /* no direct entry, avoid initialization risks */
             {   U32 const readCRC = LZ4F_readLE32(selectedIn);
-                U32 const resultCRC = XXH32_digest(&(dctxPtr->xxh));
-                if (readCRC != resultCRC) return err0r(LZ4F_ERROR_contentChecksum_invalid);
+                U32 const resultCRC = XXH32_digest(&(dctx->xxh));
+                if (readCRC != resultCRC)
+                    return err0r(LZ4F_ERROR_contentChecksum_invalid);
                 nextSrcSizeHint = 0;
-                dctxPtr->dStage = dstage_getHeader;
+                LZ4F_resetDecompressionContext(dctx);
                 doAnotherStage = 0;
                 break;
             }
@@ -1333,77 +1681,78 @@
                 srcPtr += 4;
             } else {
                 /* not enough input to read cBlockSize field */
-                dctxPtr->tmpInSize = 4;
-                dctxPtr->tmpInTarget = 8;
-                dctxPtr->dStage = dstage_storeSFrameSize;
+                dctx->tmpInSize = 4;
+                dctx->tmpInTarget = 8;
+                dctx->dStage = dstage_storeSFrameSize;
             }
 
-            if (dctxPtr->dStage == dstage_storeSFrameSize)
+            if (dctx->dStage == dstage_storeSFrameSize)
         case dstage_storeSFrameSize:
-            {
-                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
-                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
-                memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+                                             (size_t)(srcEnd - srcPtr) );
+                memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
-                dctxPtr->tmpInSize += sizeToCopy;
-                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) { /* not enough input to get full sBlockSize; wait for more */
-                    nextSrcSizeHint = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < dctx->tmpInTarget) {
+                    /* not enough input to get full sBlockSize; wait for more */
+                    nextSrcSizeHint = dctx->tmpInTarget - dctx->tmpInSize;
                     doAnotherStage = 0;
                     break;
                 }
-                selectedIn = dctxPtr->header + 4;
-            }
+                selectedIn = dctx->header + 4;
+            }   /* if (dctx->dStage == dstage_storeSFrameSize) */
 
-        /* case dstage_decodeSFrameSize: */   /* no direct access */
+        /* case dstage_decodeSFrameSize: */   /* no direct entry */
             {   size_t const SFrameSize = LZ4F_readLE32(selectedIn);
-                dctxPtr->frameInfo.contentSize = SFrameSize;
-                dctxPtr->tmpInTarget = SFrameSize;
-                dctxPtr->dStage = dstage_skipSkippable;
+                dctx->frameInfo.contentSize = SFrameSize;
+                dctx->tmpInTarget = SFrameSize;
+                dctx->dStage = dstage_skipSkippable;
                 break;
             }
 
         case dstage_skipSkippable:
-            {   size_t skipSize = dctxPtr->tmpInTarget;
-                if (skipSize > (size_t)(srcEnd-srcPtr)) skipSize = srcEnd-srcPtr;
+            {   size_t const skipSize = MIN(dctx->tmpInTarget, (size_t)(srcEnd-srcPtr));
                 srcPtr += skipSize;
-                dctxPtr->tmpInTarget -= skipSize;
+                dctx->tmpInTarget -= skipSize;
                 doAnotherStage = 0;
-                nextSrcSizeHint = dctxPtr->tmpInTarget;
-                if (nextSrcSizeHint) break;
-                dctxPtr->dStage = dstage_getHeader;
+                nextSrcSizeHint = dctx->tmpInTarget;
+                if (nextSrcSizeHint) break;  /* still more to skip */
+                /* frame fully skipped : prepare context for a new frame */
+                LZ4F_resetDecompressionContext(dctx);
                 break;
             }
-        }
-    }
+        }   /* switch (dctx->dStage) */
+    }   /* while (doAnotherStage) */
 
-    /* preserve dictionary within tmp if necessary */
-    if ( (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
-        &&(dctxPtr->dict != dctxPtr->tmpOutBuffer)
-        &&(!decompressOptionsPtr->stableDst)
-        &&((unsigned)(dctxPtr->dStage-1) < (unsigned)(dstage_getSuffix-1))
-        )
+    /* preserve history within tmp whenever necessary */
+    LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2);
+    if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked)  /* next block will use up to 64KB from previous ones */
+      && (dctx->dict != dctx->tmpOutBuffer)             /* dictionary is not already within tmp */
+      && (!decompressOptionsPtr->stableDst)             /* cannot rely on dst data to remain there for next call */
+      && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) )  /* valid stages : [init ... getSuffix[ */
     {
-        if (dctxPtr->dStage == dstage_flushOut) {
-            size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
-            size_t copySize = 64 KB - dctxPtr->tmpOutSize;
-            const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
-            if (dctxPtr->tmpOutSize > 64 KB) copySize = 0;
+        if (dctx->dStage == dstage_flushOut) {
+            size_t const preserveSize = dctx->tmpOut - dctx->tmpOutBuffer;
+            size_t copySize = 64 KB - dctx->tmpOutSize;
+            const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+            if (dctx->tmpOutSize > 64 KB) copySize = 0;
             if (copySize > preserveSize) copySize = preserveSize;
 
-            memcpy(dctxPtr->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+            if (copySize > 0)
+                memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
 
-            dctxPtr->dict = dctxPtr->tmpOutBuffer;
-            dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart;
+            dctx->dict = dctx->tmpOutBuffer;
+            dctx->dictSize = preserveSize + dctx->tmpOutStart;
         } else {
-            size_t newDictSize = dctxPtr->dictSize;
-            const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize;
-            if ((newDictSize) > 64 KB) newDictSize = 64 KB;
+            const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize;
+            size_t const newDictSize = MIN(dctx->dictSize, 64 KB);
 
-            memcpy(dctxPtr->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+            if (newDictSize > 0)
+                memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
 
-            dctxPtr->dict = dctxPtr->tmpOutBuffer;
-            dctxPtr->dictSize = newDictSize;
-            dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + newDictSize;
+            dctx->dict = dctx->tmpOutBuffer;
+            dctx->dictSize = newDictSize;
+            dctx->tmpOut = dctx->tmpOutBuffer + newDictSize;
         }
     }
 
@@ -1411,3 +1760,23 @@
     *dstSizePtr = (dstPtr - dstStart);
     return nextSrcSizeHint;
 }
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding.
+ */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const void* dict, size_t dictSize,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    if (dctx->dStage <= dstage_init) {
+        dctx->dict = (const BYTE*)dict;
+        dctx->dictSize = dictSize;
+    }
+    return LZ4F_decompress(dctx, dstBuffer, dstSizePtr,
+                           srcBuffer, srcSizePtr,
+                           decompressOptionsPtr);
+}
diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index a4a4ce6..75f1fd9 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@@ -1,7 +1,7 @@
 /*
    LZ4 auto-framing library
    Header File
-   Copyright (C) 2011-2016, Yann Collet.
+   Copyright (C) 2011-2017, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -33,9 +33,10 @@
 */
 
 /* LZ4F is a stand-alone API to create LZ4-compressed frames
- * conformant with specification v1.5.1.
+ * conformant with specification v1.6.1.
  * It also offers streaming capabilities.
- * lz4.h is not required when using lz4frame.h.
+ * lz4.h is not required when using lz4frame.h,
+ * except to get constant such as LZ4_VERSION_NUMBER.
  * */
 
 #ifndef LZ4F_H_09782039843
@@ -48,49 +49,68 @@
 /* ---   Dependency   --- */
 #include <stddef.h>   /* size_t */
 
-/*-***************************************************************
-*  Compiler specifics
-*****************************************************************/
-/*!
-*  LZ4_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
+
+/**
+  Introduction
+
+  lz4frame.h implements LZ4 frame specification (doc/lz4_Frame_format.md).
+  lz4frame.h provides frame compression functions that take care
+  of encoding standard metadata alongside LZ4-compressed blocks.
 */
+
+/*-***************************************************************
+ *  Compiler specifics
+ *****************************************************************/
+/*  LZ4_DLL_EXPORT :
+ *  Enable exporting of functions when building a Windows DLL
+ *  LZ4FLIB_API :
+ *  Control library symbols visibility.
+ */
 #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
 #  define LZ4FLIB_API __declspec(dllexport)
 #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
 #  define LZ4FLIB_API __declspec(dllimport)
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+#  define LZ4FLIB_API __attribute__ ((__visibility__ ("default")))
 #else
 #  define LZ4FLIB_API
 #endif
 
-#if defined(_MSC_VER)
-#  define LZ4F_DEPRECATE(x) x   /* __declspec(deprecated) x - only works with C++ */
-#elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
-#  define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
+#ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4F_DEPRECATE(x) x
 #else
-#  define LZ4F_DEPRECATE(x) x   /* no deprecation warning for this compiler */
+#  if defined(_MSC_VER)
+#    define LZ4F_DEPRECATE(x) x   /* __declspec(deprecated) x - only works with C++ */
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
+#    define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
+#  else
+#    define LZ4F_DEPRECATE(x) x   /* no deprecation warning for this compiler */
+#  endif
 #endif
 
 
 /*-************************************
-*  Error management
-**************************************/
+ *  Error management
+ **************************************/
 typedef size_t LZ4F_errorCode_t;
 
-LZ4FLIB_API unsigned    LZ4F_isError(LZ4F_errorCode_t code);
-LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /* return error code string; useful for debugging */
+LZ4FLIB_API unsigned    LZ4F_isError(LZ4F_errorCode_t code);   /**< tells when a function result is an error code */
+LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /**< return error code string; for debugging */
 
 
 /*-************************************
-*  Frame compression types
-**************************************/
-/* #define LZ4F_DISABLE_OBSOLETE_ENUMS */  /* uncomment to disable obsolete enums */
-#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
+ *  Frame compression types
+ **************************************/
+/* #define LZ4F_ENABLE_OBSOLETE_ENUMS   // uncomment to enable obsolete enums */
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
 #  define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
 #else
 #  define LZ4F_OBSOLETE_ENUM(x)
 #endif
 
+/* The larger the block size, the (slightly) better the compression ratio,
+ * though there are diminishing returns.
+ * Larger blocks also increase memory usage on both compression and decompression sides. */
 typedef enum {
     LZ4F_default=0,
     LZ4F_max64KB=4,
@@ -103,6 +123,9 @@
     LZ4F_OBSOLETE_ENUM(max4MB)
 } LZ4F_blockSizeID_t;
 
+/* Linked blocks sharply reduce inefficiencies when using small blocks,
+ * they compress better.
+ * However, some LZ4 decoders are only compatible with independent blocks */
 typedef enum {
     LZ4F_blockLinked=0,
     LZ4F_blockIndependent
@@ -118,63 +141,80 @@
 } LZ4F_contentChecksum_t;
 
 typedef enum {
+    LZ4F_noBlockChecksum=0,
+    LZ4F_blockChecksumEnabled
+} LZ4F_blockChecksum_t;
+
+typedef enum {
     LZ4F_frame=0,
     LZ4F_skippableFrame
     LZ4F_OBSOLETE_ENUM(skippableFrame)
 } LZ4F_frameType_t;
 
-#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
 typedef LZ4F_blockSizeID_t blockSizeID_t;
 typedef LZ4F_blockMode_t blockMode_t;
 typedef LZ4F_frameType_t frameType_t;
 typedef LZ4F_contentChecksum_t contentChecksum_t;
 #endif
 
-/* LZ4F_frameInfo_t :
- * makes it possible to supply detailed frame parameters to the stream interface.
- * It's not required to set all fields, as long as the structure was initially memset() to zero.
- * All reserved fields must be set to zero. */
+/*! LZ4F_frameInfo_t :
+ *  makes it possible to set or read frame parameters.
+ *  Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO,
+ *  setting all parameters to default.
+ *  It's then possible to update selectively some parameters */
 typedef struct {
-  LZ4F_blockSizeID_t     blockSizeID;           /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
-  LZ4F_blockMode_t       blockMode;             /* blockLinked, blockIndependent ; 0 == default */
-  LZ4F_contentChecksum_t contentChecksumFlag;   /* noContentChecksum, contentChecksumEnabled ; 0 == default  */
-  LZ4F_frameType_t       frameType;             /* LZ4F_frame, skippableFrame ; 0 == default */
-  unsigned long long     contentSize;           /* Size of uncompressed (original) content ; 0 == unknown */
-  unsigned               reserved[2];           /* must be zero for forward compatibility */
+  LZ4F_blockSizeID_t     blockSizeID;         /* max64KB, max256KB, max1MB, max4MB; 0 == default */
+  LZ4F_blockMode_t       blockMode;           /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag; /* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */
+  LZ4F_frameType_t       frameType;           /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
+  unsigned long long     contentSize;         /* Size of uncompressed content ; 0 == unknown */
+  unsigned               dictID;              /* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */
+  LZ4F_blockChecksum_t   blockChecksumFlag;   /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
 } LZ4F_frameInfo_t;
 
-/* LZ4F_preferences_t :
- * makes it possible to supply detailed compression parameters to the stream interface.
- * It's not required to set all fields, as long as the structure was initially memset() to zero.
- * All reserved fields must be set to zero. */
+#define LZ4F_INIT_FRAMEINFO   { 0, 0, 0, 0, 0, 0, 0 }    /* v1.8.3+ */
+
+/*! LZ4F_preferences_t :
+ *  makes it possible to supply advanced compression instructions to streaming interface.
+ *  Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES,
+ *  setting all parameters to default.
+ *  All reserved fields must be set to zero. */
 typedef struct {
   LZ4F_frameInfo_t frameInfo;
-  int      compressionLevel;       /* 0 == default (fast mode); values above 16 count as 16; values below 0 count as 0 */
-  unsigned autoFlush;              /* 1 == always flush (reduce usage of tmp buffer) */
-  unsigned reserved[4];            /* must be zero for forward compatibility */
+  int      compressionLevel;    /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */
+  unsigned autoFlush;           /* 1: always flush; reduces usage of internal buffers */
+  unsigned favorDecSpeed;       /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */  /* v1.8.2+ */
+  unsigned reserved[3];         /* must be zero for forward compatibility */
 } LZ4F_preferences_t;
 
+#define LZ4F_INIT_PREFERENCES   { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } }    /* v1.8.3+ */
+
 
 /*-*********************************
 *  Simple compression function
 ***********************************/
-/*!LZ4F_compressFrameBound() :
- * Returns the maximum possible size of a frame compressed with LZ4F_compressFrame() given srcSize content and preferences.
- * Note : this result is only usable with LZ4F_compressFrame(), not with multi-segments compression.
+
+LZ4FLIB_API int LZ4F_compressionLevel_max(void);
+
+/*! LZ4F_compressFrameBound() :
+ *  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+ *  Note : this result is only usable with LZ4F_compressFrame().
+ *         It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed.
  */
 LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
 
-/*!LZ4F_compressFrame() :
- * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.1
- * An important rule is that dstBuffer MUST be large enough (dstCapacity) to store the result in worst case situation.
- * This value is supplied by LZ4F_compressFrameBound().
- * If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode).
- * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+/*! LZ4F_compressFrame() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame.
+ *  dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
  * @return : number of bytes written into dstBuffer.
  *           or an error code if it fails (can be tested using LZ4F_isError())
  */
-LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
-
+LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                                const void* srcBuffer, size_t srcSize,
+                                const LZ4F_preferences_t* preferencesPtr);
 
 
 /*-***********************************
@@ -188,74 +228,90 @@
   unsigned reserved[3];
 } LZ4F_compressOptions_t;
 
-/* Resource Management */
+/*---   Resource Management   ---*/
 
-#define LZ4F_VERSION 100
+#define LZ4F_VERSION 100    /* This number can be used to check for an incompatible API breaking change */
 LZ4FLIB_API unsigned LZ4F_getVersion(void);
-LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
-LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
-/* LZ4F_createCompressionContext() :
+
+/*! LZ4F_createCompressionContext() :
  * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
- * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version.
  * The version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL.
  * The function will provide a pointer to a fully allocated LZ4F_cctx object.
  * If @return != zero, there was an error during context creation.
  * Object can release its memory using LZ4F_freeCompressionContext();
  */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
 
 
-/* Compression */
+/*----    Compression    ----*/
 
-#define LZ4F_HEADER_SIZE_MAX 15
-LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* prefsPtr);
-/* LZ4F_compressBegin() :
- * will write the frame header into dstBuffer.
- * dstCapacity must be large enough to store the header. Maximum header size is LZ4F_HEADER_SIZE_MAX bytes.
+#define LZ4F_HEADER_SIZE_MAX 19   /* LZ4 Frame header size can vary from 7 to 19 bytes */
+/*! LZ4F_compressBegin() :
+ *  will write the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
  * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default.
  * @return : number of bytes written into dstBuffer for the header
  *           or an error code (which can be tested using LZ4F_isError())
  */
+LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+                                      void* dstBuffer, size_t dstCapacity,
+                                      const LZ4F_preferences_t* prefsPtr);
 
-LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
-/* LZ4F_compressBound() :
- * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
- * prefsPtr is optional : you can provide NULL as argument, preferences will be set to cover worst case scenario.
- * Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
- * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+/*! LZ4F_compressBound() :
+ *  Provides minimum dstCapacity required to guarantee compression success
+ *  given a srcSize and preferences, covering worst case scenario.
+ *  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+ *  Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(),
+ *  Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ *  It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd().
+ *  Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin().
+ *  Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
+ *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
  */
+LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
 
-LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr);
-/* LZ4F_compressUpdate() :
- * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
- * An important rule is that dstCapacity MUST be large enough to ensure operation success even in worst case situations.
- * This value is provided by LZ4F_compressBound().
- * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
- * LZ4F_compressUpdate() doesn't guarantee error recovery. When an error occurs, compression context must be freed or resized.
+/*! LZ4F_compressUpdate() :
+ *  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ *  Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+ *  This value is provided by LZ4F_compressBound().
+ *  If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ *  LZ4F_compressUpdate() doesn't guarantee error recovery.
+ *  When an error occurs, compression context must be freed or resized.
  * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
  * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
  *           or an error code if it fails (which can be tested using LZ4F_isError())
  */
+LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+                                       void* dstBuffer, size_t dstCapacity,
+                                 const void* srcBuffer, size_t srcSize,
+                                 const LZ4F_compressOptions_t* cOptPtr);
 
-LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
-/* LZ4F_flush() :
- * When data must be generated and sent immediately, without waiting for a block to be completely filled,
- * it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+/*! LZ4F_flush() :
+ *  When data must be generated and sent immediately, without waiting for a block to be completely filled,
+ *  it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
  * `dstCapacity` must be large enough to ensure the operation will be successful.
  * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
- * @return : number of bytes written into dstBuffer (it can be zero, which means there was no data stored within cctx)
+ * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
  *           or an error code if it fails (which can be tested using LZ4F_isError())
  */
+LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
+                              void* dstBuffer, size_t dstCapacity,
+                        const LZ4F_compressOptions_t* cOptPtr);
 
-LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
-/* LZ4F_compressEnd() :
- * To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
- * It will flush whatever data remained within `cctx` (like LZ4_flush())
- * and properly finalize the frame, with an endMark and a checksum.
+/*! LZ4F_compressEnd() :
+ *  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+ *  It will flush whatever data remained within `cctx` (like LZ4_flush())
+ *  and properly finalize the frame, with an endMark and a checksum.
  * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
- * @return : number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
+ * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
  *           or an error code if it fails (which can be tested using LZ4F_isError())
- * A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ *  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
  */
+LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+                                    void* dstBuffer, size_t dstCapacity,
+                              const LZ4F_compressOptions_t* cOptPtr);
 
 
 /*-*********************************
@@ -265,66 +321,83 @@
 typedef LZ4F_dctx* LZ4F_decompressionContext_t;   /* compatibility with previous API versions */
 
 typedef struct {
-  unsigned stableDst;       /* guarantee that decompressed data will still be there on next function calls (avoid storage into tmp buffers) */
-  unsigned reserved[3];
+  unsigned stableDst;    /* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */
+  unsigned reserved[3];  /* must be set to zero for forward compatibility */
 } LZ4F_decompressOptions_t;
 
 
 /* Resource management */
 
-/*!LZ4F_createDecompressionContext() :
- * Create an LZ4F_decompressionContext_t object, which will be used to track all decompression operations.
- * The version provided MUST be LZ4F_VERSION. It is intended to track potential breaking differences between different versions.
- * The function will provide a pointer to a fully allocated and initialized LZ4F_decompressionContext_t object.
- * The result is an errorCode, which can be tested using LZ4F_isError().
- * dctx memory can be released using LZ4F_freeDecompressionContext();
- * The result of LZ4F_freeDecompressionContext() is indicative of the current state of decompressionContext when being released.
- * That is, it should be == 0 if decompression has been completed fully and correctly.
+/*! LZ4F_createDecompressionContext() :
+ *  Create an LZ4F_dctx object, to track all decompression operations.
+ *  The version provided MUST be LZ4F_VERSION.
+ *  The function provides a pointer to an allocated and initialized LZ4F_dctx object.
+ *  The result is an errorCode, which can be tested using LZ4F_isError().
+ *  dctx memory can be released using LZ4F_freeDecompressionContext();
+ *  Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
+ *  That is, it should be == 0 if decompression has been completed fully and correctly.
  */
 LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
-LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* const dctx);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
 
 
-/*======   Decompression   ======*/
+/*-***********************************
+*  Streaming decompression functions
+*************************************/
 
-/*!LZ4F_getFrameInfo() :
- * This function decodes frame header information (such as max blockSize, frame checksum, etc.).
- * Its usage is optional. The objective is to extract frame header information, typically for allocation purposes.
- * A header size is variable and can length from 7 to 15 bytes. It's possible to provide more input bytes than that.
- * The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
- * Decompression must resume from this point (srcBuffer + *srcSizePtr).
- * Note that LZ4F_getFrameInfo() can also be used anytime *after* decompression is started, in which case 0 input byte can be enough.
- * Frame header info is *copied into* an already allocated LZ4F_frameInfo_t structure.
+/*! LZ4F_getFrameInfo() :
+ *  This function extracts frame parameters (max blockSize, dictID, etc.).
+ *  Its usage is optional.
+ *  Extracted information is typically useful for allocation and dictionary.
+ *  This function works in 2 situations :
+ *   - At the beginning of a new frame, in which case
+ *     it will decode information from `srcBuffer`, starting the decoding process.
+ *     Input size must be large enough to successfully decode the entire frame header.
+ *     Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes.
+ *     It's allowed to provide more input data than this minimum.
+ *   - After decoding has been started.
+ *     In which case, no input is read, frame parameters are extracted from dctx.
+ *   - If decoding has barely started, but not yet extracted information from header,
+ *     LZ4F_getFrameInfo() will fail.
+ *  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ *  Decompression must resume from (srcBuffer + *srcSizePtr).
  * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
- *           or an error code which can be tested using LZ4F_isError()
- *           (typically, when there is not enough src bytes to fully decode the frame header)
+ *           or an error code which can be tested using LZ4F_isError().
+ *  note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
+ *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
  */
 LZ4FLIB_API size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
                                      LZ4F_frameInfo_t* frameInfoPtr,
                                      const void* srcBuffer, size_t* srcSizePtr);
 
-/*!LZ4F_decompress() :
- * Call this function repetitively to regenerate data compressed within `srcBuffer`.
- * The function will attempt to decode up to *srcSizePtr bytes from srcBuffer, into dstBuffer of capacity *dstSizePtr.
+/*! LZ4F_decompress() :
+ *  Call this function repetitively to regenerate compressed data from `srcBuffer`.
+ *  The function will read up to *srcSizePtr bytes from srcBuffer,
+ *  and decompress data into dstBuffer, of capacity *dstSizePtr.
  *
- * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *  The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+ *  The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
  *
- * The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
- * Number of bytes read can be < number of bytes provided, meaning there is some more data to decode.
- * It typically happens when dstBuffer is not large enough to contain all decoded data.
- * Remaining data will have to be presented again in a subsequent invocation.
+ *  The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
+ *  Unconsumed source data must be presented again in subsequent invocations.
  *
- * `dstBuffer` content is expected to be flushed between each invocation, as its content will be overwritten.
- * `dstBuffer` can be changed at will between each consecutive function invocation.
+ * `dstBuffer` can freely change between each consecutive function invocation.
+ * `dstBuffer` content will be overwritten.
  *
- * @return is an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
- * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
- * Respecting the hint provides some boost to performance, since it does skip intermediate buffers.
- * This is just a hint though, it's always possible to provide any srcSize.
- * When a frame is fully decoded, @return will be 0 (no more data expected).
- * If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+ * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
+ *  This is just a hint though, it's always possible to provide any srcSize.
  *
- * After a frame is fully decoded, dctx can be used again to decompress another frame.
+ *  When a frame is fully decoded, @return will be 0 (no more data expected).
+ *  When provided with more bytes than necessary to decode a frame,
+ *  LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0.
+ *
+ *  If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+ *  After a decompression error, the `dctx` context is not resumable.
+ *  Use LZ4F_resetDecompressionContext() to return to clean state.
+ *
+ *  After a frame is fully decoded, dctx can be used again to decompress another frame.
  */
 LZ4FLIB_API size_t LZ4F_decompress(LZ4F_dctx* dctx,
                                    void* dstBuffer, size_t* dstSizePtr,
@@ -332,9 +405,137 @@
                                    const LZ4F_decompressOptions_t* dOptPtr);
 
 
+/*! LZ4F_resetDecompressionContext() : added in v1.8.0
+ *  In case of an error, the context is left in "undefined" state.
+ *  In which case, it's necessary to reset it, before re-using it.
+ *  This method can also be used to abruptly stop any unfinished decompression,
+ *  and start a new one using same context resources. */
+LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx);   /* always successful */
+
+
 
 #if defined (__cplusplus)
 }
 #endif
 
 #endif  /* LZ4F_H_09782039843 */
+
+#if defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843)
+#define LZ4F_H_STATIC_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* These declarations are not stable and may change in the future. They are
+ * therefore only safe to depend on when the caller is statically linked
+ * against the library. To access their declarations, define
+ * LZ4F_STATIC_LINKING_ONLY.
+ *
+ * There is a further protection mechanism where these symbols aren't published
+ * into shared/dynamic libraries. You can override this behavior and force
+ * them to be published by defining LZ4F_PUBLISH_STATIC_FUNCTIONS. Use at
+ * your own risk.
+ */
+#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
+#define LZ4FLIB_STATIC_API LZ4FLIB_API
+#else
+#define LZ4FLIB_STATIC_API
+#endif
+
+
+/* ---   Error List   --- */
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) \
+        ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) \
+        ITEM(ERROR_blockMode_invalid) \
+        ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_headerVersion_wrong) \
+        ITEM(ERROR_blockChecksum_invalid) \
+        ITEM(ERROR_reservedFlag_set) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) \
+        ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_frameHeader_incomplete) \
+        ITEM(ERROR_frameType_unknown) \
+        ITEM(ERROR_frameSize_wrong) \
+        ITEM(ERROR_srcPtr_wrong) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_headerChecksum_invalid) \
+        ITEM(ERROR_contentChecksum_invalid) \
+        ITEM(ERROR_frameDecoding_alreadyStarted) \
+        ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+
+/* enum list is exposed, to handle specific errors */
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+
+LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+
+
+
+/**********************************
+ *  Bulk processing dictionary API
+ *********************************/
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+
+/*! LZ4_compressFrame_usingCDict() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ *  cctx must point to a context created by LZ4F_createCompressionContext().
+ *  If cdict==NULL, compress without a dictionary.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  If this condition is not respected, function will fail (@return an errorCode).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  but it's not recommended, as it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dst, size_t dstCapacity,
+    const void* src, size_t srcSize,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* preferencesPtr);
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer for the header,
+ *           or an error code (which can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dstBuffer, size_t dstCapacity,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* prefsPtr);
+
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding. */
+LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+    LZ4F_dctx* dctxPtr,
+    void* dstBuffer, size_t* dstSizePtr,
+    const void* srcBuffer, size_t* srcSizePtr,
+    const void* dict, size_t dictSize,
+    const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) */
diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h
index f2228a5..925a2c5 100644
--- a/lib/lz4frame_static.h
+++ b/lib/lz4frame_static.h
@@ -36,47 +36,12 @@
 #ifndef LZ4FRAME_STATIC_H_0398209384
 #define LZ4FRAME_STATIC_H_0398209384
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
+/* The declarations that formerly were made here have been merged into
+ * lz4frame.h, protected by the LZ4F_STATIC_LINKING_ONLY macro. Going forward,
+ * it is recommended to simply include that header directly.
+ */
 
-/* lz4frame_static.h should be used solely in the context of static linking.
- * It contains definitions which are not stable and may change in the future.
- * Never use it in the context of DLL linking.
- * */
-
-
-/* ---   Dependency   --- */
+#define LZ4F_STATIC_LINKING_ONLY
 #include "lz4frame.h"
 
-
-/* ---   Error List   --- */
-#define LZ4F_LIST_ERRORS(ITEM) \
-        ITEM(OK_NoError) ITEM(ERROR_GENERIC) \
-        ITEM(ERROR_maxBlockSize_invalid) ITEM(ERROR_blockMode_invalid) ITEM(ERROR_contentChecksumFlag_invalid) \
-        ITEM(ERROR_compressionLevel_invalid) \
-        ITEM(ERROR_headerVersion_wrong) ITEM(ERROR_blockChecksum_unsupported) ITEM(ERROR_reservedFlag_set) \
-        ITEM(ERROR_allocation_failed) \
-        ITEM(ERROR_srcSize_tooLarge) ITEM(ERROR_dstMaxSize_tooSmall) \
-        ITEM(ERROR_frameHeader_incomplete) ITEM(ERROR_frameType_unknown) ITEM(ERROR_frameSize_wrong) \
-        ITEM(ERROR_srcPtr_wrong) \
-        ITEM(ERROR_decompressionFailed) \
-        ITEM(ERROR_headerChecksum_invalid) ITEM(ERROR_contentChecksum_invalid) \
-        ITEM(ERROR_maxCode)
-
-#define LZ4F_DISABLE_OLD_ENUMS   /* comment to enable deprecated enums */
-#ifndef LZ4F_DISABLE_OLD_ENUMS
-#  define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, ENUM = LZ4F_##ENUM,
-#else
-#  define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
-#endif
-typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;  /* enum is exposed, to handle specific errors; compare function result to -enum value */
-
-LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* LZ4FRAME_STATIC_H_0398209384 */
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 5d4ea3e..e913ee7 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -1,6 +1,6 @@
 /*
     LZ4 HC - High Compression Mode of LZ4
-    Copyright (C) 2011-2016, Yann Collet.
+    Copyright (C) 2011-2017, Yann Collet.
 
     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -38,77 +38,79 @@
 *  Tuning Parameter
 ***************************************/
 
-/*!
- * HEAPMODE :
- * Select how default compression function will allocate workplace memory,
- * in stack (0:fastest), or in heap (1:requires malloc()).
- * Since workplace is rather large, heap mode is recommended.
+/*! HEAPMODE :
+ *  Select how default compression function will allocate workplace memory,
+ *  in stack (0:fastest), or in heap (1:requires malloc()).
+ *  Since workplace is rather large, heap mode is recommended.
  */
 #ifndef LZ4HC_HEAPMODE
 #  define LZ4HC_HEAPMODE 1
 #endif
 
 
-/* *************************************
-*  Dependency
-***************************************/
+/*===    Dependency    ===*/
+#define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
 
 
-/* *************************************
-*  Local Compiler Options
-***************************************/
+/*===   Common LZ4 definitions   ===*/
 #if defined(__GNUC__)
 #  pragma GCC diagnostic ignored "-Wunused-function"
 #endif
-
 #if defined (__clang__)
 #  pragma clang diagnostic ignored "-Wunused-function"
 #endif
 
-
-/* *************************************
-*  Common LZ4 definition
-***************************************/
 #define LZ4_COMMONDEFS_ONLY
-#include "lz4.c"
+#include "lz4.c"   /* LZ4_count, constants, mem */
 
 
-/* *************************************
-*  Local Constants
-***************************************/
+/*===   Constants   ===*/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM   (1<<12)
 
 
-/**************************************
-*  Local Macros
-**************************************/
-#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
-#define DELTANEXTMAXD(p)       chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
-#define DELTANEXTU16(p)        chainTable[(U16)(p)]   /* faster */
+/*===   Macros   ===*/
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
+#define HASH_FUNCTION(i)         (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p)         chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
 
 static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
 
+/*===   Enums   ===*/
+typedef enum { noDictCtx, usingDictCtx } dictCtx_directive;
 
 
 /**************************************
 *  HC Compression
 **************************************/
-static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
 {
     MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-    hc4->nextToUpdate = 64 KB;
-    hc4->base = start - 64 KB;
+}
+
+static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+    uptrval startingOffset = hc4->end - hc4->base;
+    if (startingOffset > 1 GB) {
+        LZ4HC_clearTables(hc4);
+        startingOffset = 0;
+    }
+    startingOffset += 64 KB;
+    hc4->nextToUpdate = (U32) startingOffset;
+    hc4->base = start - startingOffset;
     hc4->end = start;
-    hc4->dictBase = start - 64 KB;
-    hc4->dictLimit = 64 KB;
-    hc4->lowLimit = 64 KB;
+    hc4->dictBase = start - startingOffset;
+    hc4->dictLimit = (U32) startingOffset;
+    hc4->lowLimit = (U32) startingOffset;
 }
 
 
 /* Update chains up to ip (excluded) */
-FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
+LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
 {
     U16* const chainTable = hc4->chainTable;
     U32* const hashTable  = hc4->hashTable;
@@ -120,7 +122,7 @@
         U32 const h = LZ4HC_hashPtr(base+idx);
         size_t delta = idx - hashTable[h];
         if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
-        DELTANEXTU16(idx) = (U16)delta;
+        DELTANEXTU16(chainTable, idx) = (U16)delta;
         hashTable[h] = idx;
         idx++;
     }
@@ -128,56 +130,80 @@
     hc4->nextToUpdate = target;
 }
 
-
-FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* hc4,   /* Index table will be updated */
-                                               const BYTE* ip, const BYTE* const iLimit,
-                                               const BYTE** matchpos,
-                                               const int maxNbAttempts)
+/** LZ4HC_countBack() :
+ * @return : negative value, nb of common bytes before ip/match */
+LZ4_FORCE_INLINE
+int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
+                    const BYTE* const iMin, const BYTE* const mMin)
 {
-    U16* const chainTable = hc4->chainTable;
-    U32* const HashTable = hc4->hashTable;
-    const BYTE* const base = hc4->base;
-    const BYTE* const dictBase = hc4->dictBase;
-    const U32 dictLimit = hc4->dictLimit;
-    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
-    U32 matchIndex;
-    int nbAttempts=maxNbAttempts;
-    size_t ml=0;
-
-    /* HC4 match finder */
-    LZ4HC_Insert(hc4, ip);
-    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
-
-    while ((matchIndex>=lowLimit) && (nbAttempts)) {
-        nbAttempts--;
-        if (matchIndex >= dictLimit) {
-            const BYTE* const match = base + matchIndex;
-            if (*(match+ml) == *(ip+ml)
-                && (LZ4_read32(match) == LZ4_read32(ip)))
-            {
-                size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
-                if (mlt > ml) { ml = mlt; *matchpos = match; }
-            }
-        } else {
-            const BYTE* const match = dictBase + matchIndex;
-            if (LZ4_read32(match) == LZ4_read32(ip)) {
-                size_t mlt;
-                const BYTE* vLimit = ip + (dictLimit - matchIndex);
-                if (vLimit > iLimit) vLimit = iLimit;
-                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
-                if ((ip+mlt == vLimit) && (vLimit < iLimit))
-                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
-                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
-            }
-        }
-        matchIndex -= DELTANEXTU16(matchIndex);
-    }
-
-    return (int)ml;
+    int back = 0;
+    int const min = (int)MAX(iMin - ip, mMin - match);
+    assert(min <= 0);
+    assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+    assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+    while ( (back > min)
+         && (ip[back-1] == match[back-1]) )
+            back--;
+    return back;
 }
 
+/* LZ4HC_countPattern() :
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+{
+    const BYTE* const iStart = ip;
+    reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
 
-FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+    while (likely(ip < iEnd-(sizeof(pattern)-1))) {
+        reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
+        if (!diff) { ip+=sizeof(pattern); continue; }
+        ip += LZ4_NbCommonBytes(diff);
+        return (unsigned)(ip - iStart);
+    }
+
+    if (LZ4_isLittleEndian()) {
+        reg_t patternByte = pattern;
+        while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
+            ip++; patternByte >>= 8;
+        }
+    } else {  /* big endian */
+        U32 bitOffset = (sizeof(pattern)*8) - 8;
+        while (ip < iEnd) {
+            BYTE const byte = (BYTE)(pattern >> bitOffset);
+            if (*ip != byte) break;
+            ip ++; bitOffset -= 8;
+        }
+    }
+
+    return (unsigned)(ip - iStart);
+}
+
+/* LZ4HC_reverseCountPattern() :
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
+ * read using natural platform endianess */
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+{
+    const BYTE* const iStart = ip;
+
+    while (likely(ip >= iLow+4)) {
+        if (LZ4_read32(ip-4) != pattern) break;
+        ip -= 4;
+    }
+    {   const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
+        while (likely(ip>iLow)) {
+            if (ip[-1] != *bytePtr) break;
+            ip--; bytePtr--;
+    }   }
+    return (unsigned)(iStart - ip);
+}
+
+typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
+typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
+
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
     LZ4HC_CCtx_internal* hc4,
     const BYTE* const ip,
     const BYTE* const iLowLimit,
@@ -185,114 +211,259 @@
     int longest,
     const BYTE** matchpos,
     const BYTE** startpos,
-    const int maxNbAttempts)
+    const int maxNbAttempts,
+    const int patternAnalysis,
+    const int chainSwap,
+    const dictCtx_directive dict,
+    const HCfavor_e favorDecSpeed)
 {
     U16* const chainTable = hc4->chainTable;
     U32* const HashTable = hc4->hashTable;
+    const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
     const BYTE* const base = hc4->base;
     const U32 dictLimit = hc4->dictLimit;
     const BYTE* const lowPrefixPtr = base + dictLimit;
-    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const U32 ipIndex = (U32)(ip - base);
+    const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
     const BYTE* const dictBase = hc4->dictBase;
-    U32   matchIndex;
+    int const lookBackLength = (int)(ip-iLowLimit);
     int nbAttempts = maxNbAttempts;
-    int delta = (int)(ip-iLowLimit);
+    int matchChainPos = 0;
+    U32 const pattern = LZ4_read32(ip);
+    U32 matchIndex;
+    U32 dictMatchIndex;
+    repeat_state_e repeat = rep_untested;
+    size_t srcPatternLength = 0;
 
-
+    DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
     /* First Match */
     LZ4HC_Insert(hc4, ip);
     matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+    DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+                matchIndex, lowestMatchIndex);
 
-    while ((matchIndex>=lowLimit) && (nbAttempts)) {
+    while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
+        int matchLength=0;
         nbAttempts--;
-        if (matchIndex >= dictLimit) {
-            const BYTE* matchPtr = base + matchIndex;
-            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
-                if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
-                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
-                    int back = 0;
-
-                    while ((ip+back > iLowLimit)
-                           && (matchPtr+back > lowPrefixPtr)
-                           && (ip[back-1] == matchPtr[back-1]))
-                            back--;
-
-                    mlt -= back;
-
-                    if (mlt > longest) {
-                        longest = (int)mlt;
-                        *matchpos = matchPtr+back;
-                        *startpos = ip+back;
-                    }
-                }
-            }
-        } else {
+        assert(matchIndex < ipIndex);
+        if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
+            /* do nothing */
+        } else if (matchIndex >= dictLimit) {   /* within current Prefix */
+            const BYTE* const matchPtr = base + matchIndex;
+            assert(matchPtr >= lowPrefixPtr);
+            assert(matchPtr < ip);
+            assert(longest >= 1);
+            if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
+                if (LZ4_read32(matchPtr) == pattern) {
+                    int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+                    matchLength = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    matchLength -= back;
+                    if (matchLength > longest) {
+                        longest = matchLength;
+                        *matchpos = matchPtr + back;
+                        *startpos = ip + back;
+            }   }   }
+        } else {   /* lowestMatchIndex <= matchIndex < dictLimit */
             const BYTE* const matchPtr = dictBase + matchIndex;
-            if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
-                size_t mlt;
-                int back=0;
+            if (LZ4_read32(matchPtr) == pattern) {
+                const BYTE* const dictStart = dictBase + hc4->lowLimit;
+                int back = 0;
                 const BYTE* vLimit = ip + (dictLimit - matchIndex);
                 if (vLimit > iHighLimit) vLimit = iHighLimit;
+                matchLength = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+                    matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit);
+                back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
+                matchLength -= back;
+                if (matchLength > longest) {
+                    longest = matchLength;
+                    *matchpos = base + matchIndex + back;   /* virtual pos, relative to ip, to retrieve offset */
+                    *startpos = ip + back;
+        }   }   }
+
+        if (chainSwap && matchLength==longest) {    /* better match => select a better chain */
+            assert(lookBackLength==0);   /* search forward only */
+            if (matchIndex + longest <= ipIndex) {
+                U32 distanceToNextMatch = 1;
+                int pos;
+                for (pos = 0; pos <= longest - MINMATCH; pos++) {
+                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos);
+                    if (candidateDist > distanceToNextMatch) {
+                        distanceToNextMatch = candidateDist;
+                        matchChainPos = pos;
+                }   }
+                if (distanceToNextMatch > 1) {
+                    if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
+                    matchIndex -= distanceToNextMatch;
+                    continue;
+        }   }   }
+
+        {   U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+            if (patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+                U32 const matchCandidateIdx = matchIndex-1;
+                /* may be a repeated pattern */
+                if (repeat == rep_untested) {
+                    if ( ((pattern & 0xFFFF) == (pattern >> 16))
+                      &  ((pattern & 0xFF)   == (pattern >> 24)) ) {
+                        repeat = rep_confirmed;
+                        srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
+                    } else {
+                        repeat = rep_not;
+                }   }
+                if ( (repeat == rep_confirmed)
+                  && (matchCandidateIdx >= dictLimit) ) {   /* same segment only */
+                    const BYTE* const matchPtr = base + matchCandidateIdx;
+                    if (LZ4_read32(matchPtr) == pattern) {  /* good candidate */
+                        size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
+                        const BYTE* const lowestMatchPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
+                        size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
+                        size_t const currentSegmentLength = backLength + forwardPatternLength;
+
+                        if ( (currentSegmentLength >= srcPatternLength)   /* current pattern segment large enough to contain full srcPatternLength */
+                          && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
+                            matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
+                        } else {
+                            matchIndex = matchCandidateIdx - (U32)backLength;   /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
+                            if (lookBackLength==0) {  /* no back possible */
+                                size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
+                                if ((size_t)longest < maxML) {
+                                    assert(base + matchIndex < ip);
+                                    if (ip - (base+matchIndex) > MAX_DISTANCE) break;
+                                    assert(maxML < 2 GB);
+                                    longest = (int)maxML;
+                                    *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
+                                    *startpos = ip;
+                                }
+                                {   U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
+                                    if (distToNextPattern > matchIndex) break;  /* avoid overflow */
+                                    matchIndex -= distToNextPattern;
+                        }   }   }
+                        continue;
+                }   }
+        }   }   /* PA optimization */
+
+        /* follow current chain */
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos);
+
+    }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
+
+    if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
+        size_t const dictEndOffset = dictCtx->end - dictCtx->base;
+        assert(dictEndOffset <= 1 GB);
+        dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+        matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+        while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) {
+            const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
+
+            if (LZ4_read32(matchPtr) == pattern) {
+                int mlt;
+                int back = 0;
+                const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
                 mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
-                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
-                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
-                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
+                back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
                 mlt -= back;
-                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
+                if (mlt > longest) {
+                    longest = mlt;
+                    *matchpos = base + matchIndex + back;
+                    *startpos = ip + back;
+                }
+            }
+
+            {   U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
+                dictMatchIndex -= nextOffset;
+                matchIndex -= nextOffset;
             }
         }
-        matchIndex -= DELTANEXTU16(matchIndex);
     }
 
     return longest;
 }
 
+LZ4_FORCE_INLINE
+int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4,   /* Index table will be updated */
+                                 const BYTE* const ip, const BYTE* const iLimit,
+                                 const BYTE** matchpos,
+                                 const int maxNbAttempts,
+                                 const int patternAnalysis,
+                                 const dictCtx_directive dict)
+{
+    const BYTE* uselessPtr = ip;
+    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+     * but this won't be the case here, as we define iLowLimit==ip,
+     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+    return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
+}
 
-typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
 
-#define LZ4HC_DEBUG 0
-#if LZ4HC_DEBUG
-static unsigned debug = 0;
-#endif
 
-FORCE_INLINE int LZ4HC_encodeSequence (
+typedef enum {
+    noLimit = 0,
+    limitedOutput = 1,
+    limitedDestSize = 2,
+} limitedOutput_directive;
+
+/* LZ4HC_encodeSequence() :
+ * @return : 0 if ok,
+ *           1 if buffer issue detected */
+LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     const BYTE** ip,
     BYTE** op,
     const BYTE** anchor,
     int matchLength,
     const BYTE* const match,
-    limitedOutput_directive limitedOutputBuffer,
+    limitedOutput_directive limit,
     BYTE* oend)
 {
-    int length;
-    BYTE* token;
+    size_t length;
+    BYTE* const token = (*op)++;
 
-#if LZ4HC_DEBUG
-    if (debug) printf("literal : %u  --  match : %u  --  offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
+    static const BYTE* start = NULL;
+    static U32 totalCost = 0;
+    U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
+    U32 const ll = (U32)(*ip - *anchor);
+    U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
+    U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
+    U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
+    if (start==NULL) start = *anchor;  /* only works for single segment */
+    /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
+    DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
+                pos,
+                (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
+                cost, totalCost);
+    totalCost += cost;
 #endif
 
     /* Encode Literal length */
-    length = (int)(*ip - *anchor);
-    token = (*op)++;
-    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
-    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
-    else *token = (BYTE)(length<<ML_BITS);
+    length = (size_t)(*ip - *anchor);
+    if ((limit) && ((*op + (length >> 8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length >= RUN_MASK) {
+        size_t len = length - RUN_MASK;
+        *token = (RUN_MASK << ML_BITS);
+        for(; len >= 255 ; len -= 255) *(*op)++ = 255;
+        *(*op)++ = (BYTE)len;
+    } else {
+        *token = (BYTE)(length << ML_BITS);
+    }
 
     /* Copy Literals */
     LZ4_wildCopy(*op, *anchor, (*op) + length);
     *op += length;
 
     /* Encode Offset */
+    assert( (*ip - match) <= MAX_DISTANCE );   /* note : consider providing offset as a value, rather than as a pointer difference */
     LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
 
     /* Encode MatchLength */
-    length = (int)(matchLength-MINMATCH);
-    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
-    if (length>=(int)ML_MASK) {
+    assert(matchLength >= MINMATCH);
+    length = (size_t)(matchLength - MINMATCH);
+    if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length >= ML_MASK) {
         *token += ML_MASK;
         length -= ML_MASK;
-        for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; }
-        if (length > 254) { length-=255; *(*op)++ = 255; }
+        for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
+        if (length >= 255) { length -= 255; *(*op)++ = 255; }
         *(*op)++ = (BYTE)length;
     } else {
         *token += (BYTE)(length);
@@ -305,68 +476,71 @@
     return 0;
 }
 
-#include "lz4opt.h"
-
-static int LZ4HC_compress_hashChain (
+LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
     LZ4HC_CCtx_internal* const ctx,
     const char* const source,
     char* const dest,
-    int const inputSize,
+    int* srcSizePtr,
     int const maxOutputSize,
     unsigned maxNbAttempts,
-    limitedOutput_directive limit
+    const limitedOutput_directive limit,
+    const dictCtx_directive dict
     )
 {
+    const int inputSize = *srcSizePtr;
+    const int patternAnalysis = (maxNbAttempts > 128);   /* levels 9+ */
+
     const BYTE* ip = (const BYTE*) source;
     const BYTE* anchor = ip;
     const BYTE* const iend = ip + inputSize;
     const BYTE* const mflimit = iend - MFLIMIT;
     const BYTE* const matchlimit = (iend - LASTLITERALS);
 
+    BYTE* optr = (BYTE*) dest;
     BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
+    BYTE* oend = op + maxOutputSize;
 
-    int   ml, ml2, ml3, ml0;
+    int   ml0, ml, ml2, ml3;
+    const BYTE* start0;
+    const BYTE* ref0;
     const BYTE* ref = NULL;
     const BYTE* start2 = NULL;
     const BYTE* ref2 = NULL;
     const BYTE* start3 = NULL;
     const BYTE* ref3 = NULL;
-    const BYTE* start0;
-    const BYTE* ref0;
 
     /* init */
-    ctx->end += inputSize;
-
-    ip++;
+    *srcSizePtr = 0;
+    if (limit == limitedDestSize) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
+    if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
     /* Main Loop */
-    while (ip < mflimit) {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
-        if (!ml) { ip++; continue; }
+    while (ip <= mflimit) {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
+        if (ml<MINMATCH) { ip++; continue; }
 
         /* saved, in case we would skip too much */
-        start0 = ip;
-        ref0 = ref;
-        ml0 = ml;
+        start0 = ip; ref0 = ref; ml0 = ml;
 
 _Search2:
-        if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
-        else ml2 = ml;
+        if (ip+ml <= mflimit) {
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                            ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+                            maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+        } else {
+            ml2 = ml;
+        }
 
-        if (ml2 == ml) { /* No better match */
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+        if (ml2 == ml) { /* No better match => encode ML1 */
+            optr = op;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
             continue;
         }
 
-        if (start0 < ip) {
-            if (start2 < ip + ml0) {  /* empirical */
-                ip = start0;
-                ref = ref0;
-                ml = ml0;
-            }
-        }
+        if (start0 < ip) {   /* first match was skipped at least once */
+            if (start2 < ip + ml0) {  /* squeezing ML1 between ML0(original ML1) and ML2 */
+                ip = start0; ref = ref0; ml = ml0;  /* restore initial ML1 */
+        }   }
 
         /* Here, start0==ip */
         if ((start2 - ip) < 3) {  /* First Match too small : removed */
@@ -377,11 +551,9 @@
         }
 
 _Search3:
-        /*
-        * Currently we have :
-        * ml2 > ml1, and
-        * ip1+3 <= ip2 (usually < ip1+ml1)
-        */
+        /* At this stage, we have :
+        *  ml2 > ml1, and
+        *  ip1+3 <= ip2 (usually < ip1+ml1) */
         if ((start2 - ip) < OPTIMAL_ML) {
             int correction;
             int new_ml = ml;
@@ -396,17 +568,23 @@
         }
         /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
 
-        if (start2 + ml2 < mflimit)
-            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
-        else ml3 = ml2;
+        if (start2 + ml2 <= mflimit) {
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                            start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+                            maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+        } else {
+            ml3 = ml2;
+        }
 
-        if (ml3 == ml2) {  /* No better match : 2 sequences to encode */
+        if (ml3 == ml2) {  /* No better match => encode ML1 and ML2 */
             /* ip & ref are known; Now for ml */
             if (start2 < ip+ml)  ml = (int)(start2 - ip);
             /* Now, encode 2 sequences */
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            optr = op;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
             ip = start2;
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            optr = op;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) goto _dest_overflow;
             continue;
         }
 
@@ -424,7 +602,8 @@
                     }
                 }
 
-                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                optr = op;
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
                 ip  = start3;
                 ref = ref3;
                 ml  = ml3;
@@ -442,11 +621,12 @@
         }
 
         /*
-        * OK, now we have 3 ascending matches; let's write at least the first one
-        * ip & ref are known; Now for ml
+        * OK, now we have 3 ascending matches;
+        * let's write the first one ML1.
+        * ip & ref are known; Now decide ml.
         */
         if (start2 < ip+ml) {
-            if ((start2 - ip) < (int)ML_MASK) {
+            if ((start2 - ip) < OPTIMAL_ML) {
                 int correction;
                 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
                 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
@@ -460,136 +640,309 @@
                 ml = (int)(start2 - ip);
             }
         }
-        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+        optr = op;
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
 
-        ip = start2;
-        ref = ref2;
-        ml = ml2;
+        /* ML2 becomes ML1 */
+        ip = start2; ref = ref2; ml = ml2;
 
-        start2 = start3;
-        ref2 = ref3;
-        ml2 = ml3;
+        /* ML3 becomes ML2 */
+        start2 = start3; ref2 = ref3; ml2 = ml3;
 
+        /* let's find a new ML3 */
         goto _Search3;
     }
 
+_last_literals:
     /* Encode Last Literals */
-    {   int lastRun = (int)(iend - anchor);
-        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
+    {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+        size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+        size_t const totalSize = 1 + litLength + lastRunSize;
+        if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+        if (limit && (op + totalSize > oend)) {
+            if (limit == limitedOutput) return 0;  /* Check output limit */
+            /* adapt lastRunSize to fill 'dest' */
+            lastRunSize  = (size_t)(oend - op) - 1;
+            litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+            lastRunSize -= litLength;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = (RUN_MASK << ML_BITS);
+            for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize << ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
     }
 
     /* End */
+    *srcSizePtr = (int) (((const char*)ip) - source);
     return (int) (((char*)op)-dest);
+
+_dest_overflow:
+    if (limit == limitedDestSize) {
+        op = optr;  /* restore correct out pointer */
+        goto _last_literals;
+    }
+    return 0;
 }
 
-static int LZ4HC_getSearchNum(int compressionLevel)
+
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+    const char* const source, char* dst,
+    int* srcSizePtr, int dstCapacity,
+    int const nbSearches, size_t sufficient_len,
+    const limitedOutput_directive limit, int const fullUpdate,
+    const dictCtx_directive dict,
+    HCfavor_e favorDecSpeed);
+
+
+LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    const limitedOutput_directive limit,
+    const dictCtx_directive dict
+    )
 {
-    switch (compressionLevel) {
-        default: return 0; /* unused */
-        case 11: return 128; 
-        case 12: return 1<<10; 
+    typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
+    typedef struct {
+        lz4hc_strat_e strat;
+        U32 nbSearches;
+        U32 targetLength;
+    } cParams_t;
+    static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
+        { lz4hc,     2, 16 },  /* 0, unused */
+        { lz4hc,     2, 16 },  /* 1, unused */
+        { lz4hc,     2, 16 },  /* 2, unused */
+        { lz4hc,     4, 16 },  /* 3 */
+        { lz4hc,     8, 16 },  /* 4 */
+        { lz4hc,    16, 16 },  /* 5 */
+        { lz4hc,    32, 16 },  /* 6 */
+        { lz4hc,    64, 16 },  /* 7 */
+        { lz4hc,   128, 16 },  /* 8 */
+        { lz4hc,   256, 16 },  /* 9 */
+        { lz4opt,   96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
+        { lz4opt,  512,128 },  /*11 */
+        { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
+    };
+
+    DEBUGLOG(4, "LZ4HC_compress_generic(%p, %p, %d)", ctx, src, *srcSizePtr);
+
+    if (limit == limitedDestSize && dstCapacity < 1) return 0;         /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size (too large or negative) */
+
+    ctx->end += *srcSizePtr;
+    if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
+    cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+    {   cParams_t const cParam = clTable[cLevel];
+        HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
+        if (cParam.strat == lz4hc)
+            return LZ4HC_compress_hashChain(ctx,
+                                src, dst, srcSizePtr, dstCapacity,
+                                cParam.nbSearches, limit, dict);
+        assert(cParam.strat == lz4opt);
+        return LZ4HC_compress_optimal(ctx,
+                            src, dst, srcSizePtr, dstCapacity,
+                            cParam.nbSearches, cParam.targetLength, limit,
+                            cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
+                            dict, favor);
+    }
+}
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
+
+static int LZ4HC_compress_generic_noDictCtx (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    limitedOutput_directive limit
+    )
+{
+    assert(ctx->dictCtx == NULL);
+    return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
+}
+
+static int LZ4HC_compress_generic_dictCtx (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    limitedOutput_directive limit
+    )
+{
+    const size_t position = ctx->end - ctx->base - ctx->lowLimit;
+    assert(ctx->dictCtx != NULL);
+    if (position >= 64 KB) {
+        ctx->dictCtx = NULL;
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else if (position == 0 && *srcSizePtr > 4 KB) {
+        memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
+        LZ4HC_setExternalDict(ctx, (const BYTE *)src);
+        ctx->compressionLevel = (short)cLevel;
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else {
+        return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtx);
     }
 }
 
 static int LZ4HC_compress_generic (
     LZ4HC_CCtx_internal* const ctx,
-    const char* const source,
-    char* const dest,
-    int const inputSize,
-    int const maxOutputSize,
-    int compressionLevel,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
     limitedOutput_directive limit
     )
 {
-    if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
-    if (compressionLevel > 9) {
-        switch (compressionLevel) {
-            case 10: return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (16-1), limit);
-            case 11: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, 128, 0);
-            default:
-            case 12: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, LZ4_OPT_NUM, 1);
-        }
+    if (ctx->dictCtx == NULL) {
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else {
+        return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
     }
-    return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (compressionLevel-1), limit);
 }
 
 
 int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
 
-int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
-    LZ4HC_CCtx_internal* ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+    LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
     if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
     LZ4HC_init (ctx, (const BYTE*)src);
-    if (maxDstSize < LZ4_compressBound(srcSize))
-        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
+    if (dstCapacity < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
     else
-        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, noLimit);
 }
 
-int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4_resetStreamHC ((LZ4_streamHC_t*)state, compressionLevel);
+    return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
 #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
-    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t));
+    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
 #else
     LZ4_streamHC_t state;
     LZ4_streamHC_t* const statePtr = &state;
 #endif
-    int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel);
+    int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
 #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
     free(statePtr);
 #endif
     return cSize;
 }
 
+/* LZ4_compress_HC_destSize() :
+ * only compatible with regular HC parser */
+int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+{
+    LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
+    LZ4_resetStreamHC((LZ4_streamHC_t*)LZ4HC_Data, cLevel);
+    LZ4HC_init(ctx, (const BYTE*) source);
+    return LZ4HC_compress_generic(ctx, source, dest, sourceSizePtr, targetDestSize, cLevel, limitedDestSize);
+}
+
 
 
 /**************************************
 *  Streaming Functions
 **************************************/
 /* allocation */
-LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
-int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; }
+LZ4_streamHC_t* LZ4_createStreamHC(void) {
+    LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+    if (LZ4_streamHCPtr==NULL) return NULL;
+    LZ4_resetStreamHC(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+    return LZ4_streamHCPtr;
+}
+
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
+    DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
+    if (!LZ4_streamHCPtr) return 0;  /* support free on NULL */
+    free(LZ4_streamHCPtr);
+    return 0;
+}
 
 
 /* initialization */
 void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
 {
     LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    DEBUGLOG(4, "LZ4_resetStreamHC(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1;
     LZ4_streamHCPtr->internal_donotuse.base = NULL;
-    LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned)compressionLevel;
-    LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
+    LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+    LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
+    LZ4_streamHCPtr->internal_donotuse.base = NULL;
+    LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
+    if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
+    LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
+}
+
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
+{
+    LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
 }
 
 int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
 {
-    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize);
     if (dictSize > 64 KB) {
         dictionary += dictSize - 64 KB;
         dictSize = 64 KB;
     }
+    LZ4_resetStreamHC(LZ4_streamHCPtr, ctxPtr->compressionLevel);
     LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
     ctxPtr->end = (const BYTE*)dictionary + dictSize;
-    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
-        LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
-    else
-        if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
     return dictSize;
 }
 
+void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
+    working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
+}
 
 /* compression */
 
 static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
 {
-    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
-        LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
-    else
-        if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
+    if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
 
     /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
     ctxPtr->lowLimit  = ctxPtr->dictLimit;
@@ -601,12 +954,14 @@
 }
 
 static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
-                                            const char* source, char* dest,
-                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
+                                            const char* src, char* dst,
+                                            int* srcSizePtr, int dstCapacity,
+                                            limitedOutput_directive limit)
 {
-    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    DEBUGLOG(4, "LZ4_compressHC_continue_generic(%p, %p, %d)", LZ4_streamHCPtr, src, *srcSizePtr);
     /* auto-init if forgotten */
-    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) source);
+    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) src);
 
     /* Check overflow */
     if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
@@ -616,30 +971,36 @@
     }
 
     /* Check if blocks follow each other */
-    if ((const BYTE*)source != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+    if ((const BYTE*)src != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
 
     /* Check overlapping input/dictionary space */
-    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+    {   const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
         const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
         const BYTE* const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
-        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd)) {
+        if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
             if (sourceEnd > dictEnd) sourceEnd = dictEnd;
             ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
             if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
         }
     }
 
-    return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+    return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
 }
 
-int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
 {
-    if (maxOutputSize < LZ4_compressBound(inputSize))
-        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+    if (dstCapacity < LZ4_compressBound(srcSize))
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
     else
-        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, noLimit);
 }
 
+int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
+{
+    return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
+}
+
+
 
 /* dictionary saving */
 
@@ -647,6 +1008,7 @@
 {
     LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
     int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
     if (dictSize > 64 KB) dictSize = 64 KB;
     if (dictSize < 4) dictSize = 0;
     if (dictSize > prefixSize) dictSize = prefixSize;
@@ -686,35 +1048,365 @@
 {
     LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
     if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+    LZ4_resetStreamHC((LZ4_streamHC_t*)state, ((LZ4_streamHC_t*)state)->internal_donotuse.compressionLevel);
     LZ4HC_init(ctx, (const BYTE*)inputBuffer);
-    ctx->inputBuffer = (BYTE*)inputBuffer;
     return 0;
 }
 
-void* LZ4_createHC (char* inputBuffer)
+void* LZ4_createHC (const char* inputBuffer)
 {
-    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
+    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
     if (hc4 == NULL) return NULL;   /* not enough memory */
+    LZ4_resetStreamHC(hc4, 0 /* compressionLevel */);
     LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
-    hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
     return hc4;
 }
 
-int LZ4_freeHC (void* LZ4HC_Data) { FREEMEM(LZ4HC_Data); return 0; }
-
-int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
-{
-    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, source, dest, inputSize, 0, compressionLevel, noLimit);
+int LZ4_freeHC (void* LZ4HC_Data) {
+    if (!LZ4HC_Data) return 0;  /* support free on NULL */
+    FREEMEM(LZ4HC_Data);
+    return 0;
 }
 
-int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
 {
-    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, noLimit);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
+{
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
 }
 
 char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
 {
-    LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
-    int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
-    return (char*)(hc4->inputBuffer + dictSize);
+    LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data;
+    const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit;
+    LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
+    /* avoid const char * -> char * conversion warning :( */
+    return (char *)(uptrval)bufferStart;
 }
+
+
+/* ================================================
+ * LZ4 Optimal parser (levels 10-12)
+ * ===============================================*/
+typedef struct {
+    int price;
+    int off;
+    int mlen;
+    int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+    int price = litlen;
+    if (litlen >= (int)RUN_MASK)
+        price += 1 + (litlen-RUN_MASK)/255;
+    return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+    int price = 1 + 2 ; /* token + 16-bit offset */
+
+    price += LZ4HC_literalsPrice(litlen);
+
+    if (mlen >= (int)(ML_MASK+MINMATCH))
+        price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
+
+    return price;
+}
+
+
+typedef struct {
+    int off;
+    int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+                      const BYTE* ip, const BYTE* const iHighLimit,
+                      int minLen, int nbSearches,
+                      const dictCtx_directive dict,
+                      const HCfavor_e favorDecSpeed)
+{
+    LZ4HC_match_t match = { 0 , 0 };
+    const BYTE* matchPtr = NULL;
+    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+     * but this won't be the case here, as we define iLowLimit==ip,
+     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+    int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
+    if (matchLength <= minLen) return match;
+    if (favorDecSpeed) {
+        if ((matchLength>18) & (matchLength<=36)) matchLength=18;   /* favor shortcut */
+    }
+    match.len = matchLength;
+    match.off = (int)(ip-matchPtr);
+    return match;
+}
+
+
+static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
+                                    const char* const source,
+                                    char* dst,
+                                    int* srcSizePtr,
+                                    int dstCapacity,
+                                    int const nbSearches,
+                                    size_t sufficient_len,
+                                    const limitedOutput_directive limit,
+                                    int const fullUpdate,
+                                    const dictCtx_directive dict,
+                                    const HCfavor_e favorDecSpeed)
+{
+#define TRAILING_LITERALS 3
+    LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* ~64 KB, which is a bit large for stack... */
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+    BYTE* op = (BYTE*) dst;
+    BYTE* opSaved = (BYTE*) dst;
+    BYTE* oend = op + dstCapacity;
+
+    /* init */
+    DEBUGLOG(5, "LZ4HC_compress_optimal");
+    *srcSizePtr = 0;
+    if (limit == limitedDestSize) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
+    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+    /* Main Loop */
+    assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
+    while (ip <= mflimit) {
+         int const llen = (int)(ip - anchor);
+         int best_mlen, best_off;
+         int cur, last_match_pos = 0;
+
+         LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+         if (firstMatch.len==0) { ip++; continue; }
+
+         if ((size_t)firstMatch.len > sufficient_len) {
+             /* good enough solution : immediate encoding */
+             int const firstML = firstMatch.len;
+             const BYTE* const matchPos = ip - firstMatch.off;
+             opSaved = op;
+             if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
+                 goto _dest_overflow;
+             continue;
+         }
+
+         /* set prices for first positions (literals) */
+         {   int rPos;
+             for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+                 int const cost = LZ4HC_literalsPrice(llen + rPos);
+                 opt[rPos].mlen = 1;
+                 opt[rPos].off = 0;
+                 opt[rPos].litlen = llen + rPos;
+                 opt[rPos].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             rPos, cost, opt[rPos].litlen);
+         }   }
+         /* set prices using initial match */
+         {   int mlen = MINMATCH;
+             int const matchML = firstMatch.len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
+             int const offset = firstMatch.off;
+             assert(matchML < LZ4_OPT_NUM);
+             for ( ; mlen <= matchML ; mlen++) {
+                 int const cost = LZ4HC_sequencePrice(llen, mlen);
+                 opt[mlen].mlen = mlen;
+                 opt[mlen].off = offset;
+                 opt[mlen].litlen = llen;
+                 opt[mlen].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+                             mlen, cost, mlen);
+         }   }
+         last_match_pos = firstMatch.len;
+         {   int addLit;
+             for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                 opt[last_match_pos+addLit].mlen = 1; /* literal */
+                 opt[last_match_pos+addLit].off = 0;
+                 opt[last_match_pos+addLit].litlen = addLit;
+                 opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+         }   }
+
+         /* check further positions */
+         for (cur = 1; cur < last_match_pos; cur++) {
+             const BYTE* const curPtr = ip + cur;
+             LZ4HC_match_t newMatch;
+
+             if (curPtr > mflimit) break;
+             DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+                     cur, opt[cur].price, opt[cur+1].price, cur+1);
+             if (fullUpdate) {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if ( (opt[cur+1].price <= opt[cur].price)
+                   /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+                   && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+                     continue;
+             } else {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if (opt[cur+1].price <= opt[cur].price) continue;
+             }
+
+             DEBUGLOG(7, "search at rPos:%u", cur);
+             if (fullUpdate)
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+             else
+                 /* only test matches of minimum length; slightly faster, but misses a few bytes */
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
+             if (!newMatch.len) continue;
+
+             if ( ((size_t)newMatch.len > sufficient_len)
+               || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+                 /* immediate encoding */
+                 best_mlen = newMatch.len;
+                 best_off = newMatch.off;
+                 last_match_pos = cur + 1;
+                 goto encode;
+             }
+
+             /* before match : set price with literals at beginning */
+             {   int const baseLitlen = opt[cur].litlen;
+                 int litlen;
+                 for (litlen = 1; litlen < MINMATCH; litlen++) {
+                     int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+                     int const pos = cur + litlen;
+                     if (price < opt[pos].price) {
+                         opt[pos].mlen = 1; /* literal */
+                         opt[pos].off = 0;
+                         opt[pos].litlen = baseLitlen+litlen;
+                         opt[pos].price = price;
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+                                     pos, price, opt[pos].litlen);
+             }   }   }
+
+             /* set prices using match at position = cur */
+             {   int const matchML = newMatch.len;
+                 int ml = MINMATCH;
+
+                 assert(cur + newMatch.len < LZ4_OPT_NUM);
+                 for ( ; ml <= matchML ; ml++) {
+                     int const pos = cur + ml;
+                     int const offset = newMatch.off;
+                     int price;
+                     int ll;
+                     DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+                                 pos, last_match_pos);
+                     if (opt[cur].mlen == 1) {
+                         ll = opt[cur].litlen;
+                         price = ((cur > ll) ? opt[cur - ll].price : 0)
+                               + LZ4HC_sequencePrice(ll, ml);
+                     } else {
+                         ll = 0;
+                         price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+                     }
+
+                    assert((U32)favorDecSpeed <= 1);
+                     if (pos > last_match_pos+TRAILING_LITERALS
+                      || price <= opt[pos].price - (int)favorDecSpeed) {
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+                                     pos, price, ml);
+                         assert(pos < LZ4_OPT_NUM);
+                         if ( (ml == matchML)  /* last pos of last match */
+                           && (last_match_pos < pos) )
+                             last_match_pos = pos;
+                         opt[pos].mlen = ml;
+                         opt[pos].off = offset;
+                         opt[pos].litlen = ll;
+                         opt[pos].price = price;
+             }   }   }
+             /* complete following positions with literals */
+             {   int addLit;
+                 for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                     opt[last_match_pos+addLit].mlen = 1; /* literal */
+                     opt[last_match_pos+addLit].off = 0;
+                     opt[last_match_pos+addLit].litlen = addLit;
+                     opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                     DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+             }   }
+         }  /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+         best_mlen = opt[last_match_pos].mlen;
+         best_off = opt[last_match_pos].off;
+         cur = last_match_pos - best_mlen;
+
+ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+         assert(cur < LZ4_OPT_NUM);
+         assert(last_match_pos >= 1);  /* == 1 when only one candidate */
+         DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
+         {   int candidate_pos = cur;
+             int selected_matchLength = best_mlen;
+             int selected_offset = best_off;
+             while (1) {  /* from end to beginning */
+                 int const next_matchLength = opt[candidate_pos].mlen;  /* can be 1, means literal */
+                 int const next_offset = opt[candidate_pos].off;
+                 DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+                 opt[candidate_pos].mlen = selected_matchLength;
+                 opt[candidate_pos].off = selected_offset;
+                 selected_matchLength = next_matchLength;
+                 selected_offset = next_offset;
+                 if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+                 assert(next_matchLength > 0);  /* can be 1, means literal */
+                 candidate_pos -= next_matchLength;
+         }   }
+
+         /* encode all recorded sequences in order */
+         {   int rPos = 0;  /* relative position (to ip) */
+             while (rPos < last_match_pos) {
+                 int const ml = opt[rPos].mlen;
+                 int const offset = opt[rPos].off;
+                 if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
+                 rPos += ml;
+                 assert(ml >= MINMATCH);
+                 assert((offset >= 1) && (offset <= MAX_DISTANCE));
+                 opSaved = op;
+                 if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
+                     goto _dest_overflow;
+         }   }
+     }  /* while (ip <= mflimit) */
+
+ _last_literals:
+     /* Encode Last Literals */
+     {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+         size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+         size_t const totalSize = 1 + litLength + lastRunSize;
+         if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+         if (limit && (op + totalSize > oend)) {
+             if (limit == limitedOutput) return 0;  /* Check output limit */
+             /* adapt lastRunSize to fill 'dst' */
+             lastRunSize  = (size_t)(oend - op) - 1;
+             litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+             lastRunSize -= litLength;
+         }
+         ip = anchor + lastRunSize;
+
+         if (lastRunSize >= RUN_MASK) {
+             size_t accumulator = lastRunSize - RUN_MASK;
+             *op++ = (RUN_MASK << ML_BITS);
+             for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+             *op++ = (BYTE) accumulator;
+         } else {
+             *op++ = (BYTE)(lastRunSize << ML_BITS);
+         }
+         memcpy(op, anchor, lastRunSize);
+         op += lastRunSize;
+     }
+
+     /* End */
+     *srcSizePtr = (int) (((const char*)ip) - source);
+     return (int) ((char*)op-dst);
+
+ _dest_overflow:
+     if (limit == limitedDestSize) {
+         op = opSaved;  /* restore correct out pointer */
+         goto _last_literals;
+     }
+     return 0;
+ }
diff --git a/lib/lz4hc.h b/lib/lz4hc.h
index 1036fd0..970fa39 100644
--- a/lib/lz4hc.h
+++ b/lib/lz4hc.h
@@ -1,7 +1,7 @@
 /*
    LZ4 HC - High Compression Mode of LZ4
    Header File
-   Copyright (C) 2011-2016, Yann Collet.
+   Copyright (C) 2011-2017, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -39,14 +39,14 @@
 #endif
 
 /* --- Dependency --- */
-/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+/* note : lz4hc requires lz4.h/lz4.c for compilation */
 #include "lz4.h"   /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
 
 
 /* --- Useful constants --- */
 #define LZ4HC_CLEVEL_MIN         3
 #define LZ4HC_CLEVEL_DEFAULT     9
-#define LZ4HC_CLEVEL_OPT_MIN    11
+#define LZ4HC_CLEVEL_OPT_MIN    10
 #define LZ4HC_CLEVEL_MAX        12
 
 
@@ -54,12 +54,12 @@
  *  Block Compression
  **************************************/
 /*! LZ4_compress_HC() :
- * Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
+ *  Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
  * `dst` must be already allocated.
- * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
- * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
- * `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_MAX_CLEVEL will work.
- *                      Values >LZ4HC_MAX_CLEVEL behave the same as LZ4HC_MAX_CLEVEL.
+ *  Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ *  Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ *                      Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
  * @return : the number of bytes written into 'dst'
  *           or 0 if compression fails.
  */
@@ -72,12 +72,12 @@
 
 
 /*! LZ4_compress_HC_extStateHC() :
- * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ *  Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
  * `state` size is provided by LZ4_sizeofStateHC().
- * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
+ *  Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
  */
-LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
 LZ4LIB_API int LZ4_sizeofStateHC(void);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
 
 
 /*-************************************
@@ -87,10 +87,10 @@
  typedef union LZ4_streamHC_u LZ4_streamHC_t;   /* incomplete type (defined later) */
 
 /*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
- * These functions create and release memory for LZ4 HC streaming state.
- * Newly created states are automatically initialized.
- * Existing states can be re-used several times, using LZ4_resetStreamHC().
- * These methods are API and ABI stable, they can be used in combination with a DLL.
+ *  These functions create and release memory for LZ4 HC streaming state.
+ *  Newly created states are automatically initialized.
+ *  Existing states can be re-used several times, using LZ4_resetStreamHC().
+ *  These methods are API and ABI stable, they can be used in combination with a DLL.
  */
 LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
 LZ4LIB_API int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
@@ -113,25 +113,23 @@
 
   Then, use LZ4_compress_HC_continue() to compress each successive block.
   Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
-  'dst' buffer should be sized to handle worst case scenarios, using LZ4_compressBound(), to ensure operation success.
+  'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success.
+  Because in case of failure, the API does not guarantee context recovery, and context will have to be reset.
+  If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead.
 
-  If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
-  you must save it to a safer memory space, using LZ4_saveDictHC().
+  If, for any reason, previous data block can't be preserved unmodified in memory for next compression block,
+  you can save it to a more stable memory space, using LZ4_saveDictHC().
   Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
 */
 
 
-/*-******************************************
- * !!!!!   STATIC LINKING ONLY   !!!!!
- *******************************************/
-
- /*-*************************************
+/*-**************************************************************
  * PRIVATE DEFINITIONS :
  * Do not use these definitions.
  * They are exposed to allow static allocation of `LZ4_streamHC_t`.
  * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
- **************************************/
-#define LZ4HC_DICTIONARY_LOGSIZE 17
+ ****************************************************************/
+#define LZ4HC_DICTIONARY_LOGSIZE 16
 #define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
 #define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
 
@@ -143,41 +141,43 @@
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #include <stdint.h>
 
-typedef struct
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
 {
     uint32_t   hashTable[LZ4HC_HASHTABLESIZE];
     uint16_t   chainTable[LZ4HC_MAXD];
-    const uint8_t* end;        /* next block here to continue on current prefix */
-    const uint8_t* base;       /* All index relative to this position */
-    const uint8_t* dictBase;   /* alternate base for extDict */
-    uint8_t* inputBuffer;      /* deprecated */
-    uint32_t   dictLimit;        /* below that point, need extDict */
-    uint32_t   lowLimit;         /* below that point, no more dict */
-    uint32_t   nextToUpdate;     /* index from which to continue dictionary update */
-    uint32_t   searchNum;        /* only for optimal parser */
-    uint32_t   compressionLevel;
-} LZ4HC_CCtx_internal;
+    const uint8_t* end;         /* next block here to continue on current prefix */
+    const uint8_t* base;        /* All index relative to this position */
+    const uint8_t* dictBase;    /* alternate base for extDict */
+    uint32_t   dictLimit;       /* below that point, need extDict */
+    uint32_t   lowLimit;        /* below that point, no more dict */
+    uint32_t   nextToUpdate;    /* index from which to continue dictionary update */
+    short      compressionLevel;
+    short      favorDecSpeed;
+    const LZ4HC_CCtx_internal* dictCtx;
+};
 
 #else
 
-typedef struct
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
 {
     unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
-    unsigned short   chainTable[LZ4HC_MAXD];
+    unsigned short chainTable[LZ4HC_MAXD];
     const unsigned char* end;        /* next block here to continue on current prefix */
     const unsigned char* base;       /* All index relative to this position */
     const unsigned char* dictBase;   /* alternate base for extDict */
-    unsigned char* inputBuffer;      /* deprecated */
     unsigned int   dictLimit;        /* below that point, need extDict */
     unsigned int   lowLimit;         /* below that point, no more dict */
     unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
-    unsigned int   searchNum;        /* only for optimal parser */
-    unsigned int   compressionLevel;
-} LZ4HC_CCtx_internal;
+    short          compressionLevel;
+    short          favorDecSpeed;
+    const LZ4HC_CCtx_internal* dictCtx;
+};
 
 #endif
 
-#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 393268 */
+#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 262200 */
 #define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
 union LZ4_streamHC_u {
     size_t table[LZ4_STREAMHCSIZE_SIZET];
@@ -199,26 +199,32 @@
 /* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
 
 /* deprecated compression functions */
-/* these functions will trigger warning messages in future releases */
-LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC               (const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC               (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
 
-/* Deprecated Streaming functions using older model; should no longer be used */
-LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
-LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
-LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int   LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
 
 
 #if defined (__cplusplus)
@@ -226,3 +232,124 @@
 #endif
 
 #endif /* LZ4_HC_H_19834876238432 */
+
+
+/*-**************************************************
+ * !!!!!     STATIC LINKING ONLY     !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successfull usage in real-life scenarios.
+ ***************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY   /* protection macro */
+#ifndef LZ4_HC_SLO_098092834
+#define LZ4_HC_SLO_098092834
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
+ *  Will try to compress as much data from `src` as possible
+ *  that can fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
+ */
+int LZ4_compress_HC_destSize(void* LZ4HC_Data,
+                             const char* src, char* dst,
+                             int* srcSizePtr, int targetDstSize,
+                             int compressionLevel);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
+ *  Similar as LZ4_compress_HC_continue(),
+ *  but will read a variable nb of bytes from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
+ */
+int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+                            const char* src, char* dst,
+                            int* srcSizePtr, int targetDstSize);
+
+/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
+ *  It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*()
+ */
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_favorDecompressionSpeed() : v1.8.2 (experimental)
+ *  Parser will select decisions favoring decompression over compression ratio.
+ *  Only work at highest compression settings (level >= LZ4HC_CLEVEL_OPT_MIN)
+ */
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
+
+/*! LZ4_resetStreamHC_fast() :
+ *  When an LZ4_streamHC_t is known to be in a internally coherent state,
+ *  it can often be prepared for a new compression with almost no work, only
+ *  sometimes falling back to the full, expensive reset that is always required
+ *  when the stream is in an indeterminate state (i.e., the reset performed by
+ *  LZ4_resetStreamHC()).
+ *
+ *  LZ4_streamHCs are guaranteed to be in a valid state when:
+ *  - returned from LZ4_createStreamHC()
+ *  - reset by LZ4_resetStreamHC()
+ *  - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ *  - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ *  - the stream was in a valid state and was then used in any compression call
+ *    that returned success
+ *  - the stream was in an indeterminate state and was used in a compression
+ *    call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ *    returned success
+ */
+void LZ4_resetStreamHC_fast(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ *  A variant of LZ4_compress_HC_extStateHC().
+ *
+ *  Using this variant avoids an expensive initialization step. It is only safe
+ *  to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ *  "correctly initialized"). From a high level, the difference is that this
+ *  function initializes the provided state with a call to
+ *  LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ *  call to LZ4_resetStreamHC().
+ */
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+
+/*! LZ4_attach_HC_dictionary() :
+ *  This is an experimental API that allows for the efficient use of a
+ *  static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ *  working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionary stream pointer may be NULL, in which
+ *  case any existing dictionary stream is unset.
+ *
+ *  A dictionary should only be attached to a stream without any history (i.e.,
+ *  a stream that has just been reset).
+ *
+ *  The dictionary will remain attached to the working stream only for the
+ *  current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ *  dictionary context association from the working stream. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the lifetime of the stream session.
+ */
+LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* LZ4_HC_SLO_098092834 */
+#endif   /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/lib/lz4opt.h b/lib/lz4opt.h
deleted file mode 100644
index d1913fe..0000000
--- a/lib/lz4opt.h
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
-    lz4opt.h - Optimal Mode of LZ4
-    Copyright (C) 2015-2016, Przemyslaw Skibinski <inikep@gmail.com>
-    Note : this file is intended to be included within lz4hc.c
-
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are
-    met:
-
-    * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-    copyright notice, this list of conditions and the following disclaimer
-    in the documentation and/or other materials provided with the
-    distribution.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-       - LZ4 source repository : https://github.com/lz4/lz4
-       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-#define LZ4_OPT_NUM   (1<<12)
-
-
-typedef struct
-{
-    int off;
-    int len;
-} LZ4HC_match_t;
-
-typedef struct
-{
-    int price;
-    int off;
-    int mlen;
-    int litlen;
-} LZ4HC_optimal_t;
-
-
-/* price in bits */
-FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
-{
-    size_t price = 8*litlen;
-    if (litlen >= (size_t)RUN_MASK) price+=8*(1+(litlen-RUN_MASK)/255);
-    return price;
-}
-
-
-/* requires mlen >= MINMATCH */
-FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
-{
-    size_t price = 16 + 8; /* 16-bit offset + token */
-
-    price += LZ4HC_literalsPrice(litlen);
-
-    mlen -= MINMATCH;
-    if (mlen >= (size_t)ML_MASK) price+=8*(1+(mlen-ML_MASK)/255);
-
-    return price;
-}
-
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
-    LZ4HC_CCtx_internal* ctx,
-    const BYTE* const ip,
-    const BYTE* const iHighLimit,
-    size_t best_mlen,
-    LZ4HC_match_t* matches,
-    int* matchNum)
-{
-    U16* const chainTable = ctx->chainTable;
-    U32* const HashTable = ctx->hashTable;
-    const BYTE* const base = ctx->base;
-    const U32 dictLimit = ctx->dictLimit;
-    const U32 current = (U32)(ip - base);
-    const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
-    const BYTE* const dictBase = ctx->dictBase;
-    const BYTE* match;
-    int nbAttempts = ctx->searchNum;
-    int mnum = 0;
-    U16 *ptr0, *ptr1, delta0, delta1;
-    U32 matchIndex;
-    size_t matchLength = 0;
-    U32* HashPos;
-
-    if (ip + MINMATCH > iHighLimit) return 1;
-
-    /* HC4 match finder */
-    HashPos = &HashTable[LZ4HC_hashPtr(ip)];
-    matchIndex = *HashPos;
-    *HashPos = current;
-
-    ptr0 = &DELTANEXTMAXD(current*2+1);
-    ptr1 = &DELTANEXTMAXD(current*2);
-    delta0 = delta1 = (U16)(current - matchIndex);
-
-    while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
-        nbAttempts--;
-        if (matchIndex >= dictLimit) {
-            match = base + matchIndex;
-            matchLength = LZ4_count(ip, match, iHighLimit);
-        } else {
-            const BYTE* vLimit = ip + (dictLimit - matchIndex);
-            match = dictBase + matchIndex;
-            if (vLimit > iHighLimit) vLimit = iHighLimit;
-            matchLength = LZ4_count(ip, match, vLimit);
-            if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
-                matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
-        }
-
-        if (matchLength > best_mlen) {
-            best_mlen = matchLength;
-            if (matches) {
-                if (matchIndex >= dictLimit)
-                    matches[mnum].off = (int)(ip - match);
-                else
-                    matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
-                matches[mnum].len = (int)matchLength;
-                mnum++;
-            }
-            if (best_mlen > LZ4_OPT_NUM) break;
-        }
-
-        if (ip+matchLength >= iHighLimit)   /* equal : no way to know if inf or sup */
-            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
-
-        if (*(ip+matchLength) < *(match+matchLength)) {
-            *ptr0 = delta0;
-            ptr0 = &DELTANEXTMAXD(matchIndex*2);
-            if (*ptr0 == (U16)-1) break;
-            delta0 = *ptr0;
-            delta1 += delta0;
-            matchIndex -= delta0;
-        } else {
-            *ptr1 = delta1;
-            ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
-            if (*ptr1 == (U16)-1) break;
-            delta1 = *ptr1;
-            delta0 += delta1;
-            matchIndex -= delta1;
-        }
-    }
-
-    *ptr0 = (U16)-1;
-    *ptr1 = (U16)-1;
-    if (matchNum) *matchNum = mnum;
-  /*  if (best_mlen > 8) return best_mlen-8; */
-    if (!matchNum) return 1;
-    return 1;
-}
-
-
-FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
-{
-    const BYTE* const base = ctx->base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = ctx->nextToUpdate;
-    while(idx < target)
-        idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
-}
-
-
-/** Tree updater, providing best match */
-FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
-                        LZ4HC_CCtx_internal* ctx,
-                        const BYTE* const ip, const BYTE* const iHighLimit,
-                        size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
-{
-    int mnum = 0;
-    if (ip < ctx->base + ctx->nextToUpdate) return 0;   /* skipped area */
-    if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
-    best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
-    ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
-    return mnum;
-}
-
-
-#define SET_PRICE(pos, mlen, offset, litlen, price)    \
-{                                                      \
-    while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
-    opt[pos].mlen = (int)mlen;                         \
-    opt[pos].off = (int)offset;                        \
-    opt[pos].litlen = (int)litlen;                     \
-    opt[pos].price = (int)price;                       \
-}
-
-
-static int LZ4HC_compress_optimal (
-    LZ4HC_CCtx_internal* ctx,
-    const char* const source,
-    char* dest,
-    int inputSize,
-    int maxOutputSize,
-    limitedOutput_directive limit,
-    const size_t sufficient_len,
-    const int fullUpdate
-    )
-{
-    LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1];
-    LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
-    const BYTE *inr = NULL;
-    size_t res, cur, cur2;
-    size_t i, llen, litlen, mlen, best_mlen, price, offset, best_off, match_num, last_pos;
-
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = (iend - LASTLITERALS);
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    /* init */
-    ctx->end += inputSize;
-    ip++;
-
-    /* Main Loop */
-    while (ip < mflimit) {
-        memset(opt, 0, sizeof(LZ4HC_optimal_t));
-        last_pos = 0;
-        llen = ip - anchor;
-        match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
-        if (!match_num) { ip++; continue; }
-
-        if ((size_t)matches[match_num-1].len > sufficient_len) {
-            best_mlen = matches[match_num-1].len;
-            best_off = matches[match_num-1].off;
-            cur = 0;
-            last_pos = 1;
-            goto encode;
-        }
-
-        /* set prices using matches at position = 0 */
-        for (i = 0; i < match_num; i++) {
-           mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH;
-           best_mlen = (matches[i].len < LZ4_OPT_NUM) ? matches[i].len : LZ4_OPT_NUM;
-           while (mlen <= best_mlen) {
-                litlen = 0;
-                price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen);
-                SET_PRICE(mlen, mlen, matches[i].off, litlen, price);
-                mlen++;
-           }
-        }
-
-        if (last_pos < MINMATCH) { ip++; continue; }
-
-        /* check further positions */
-        opt[0].mlen = opt[1].mlen = 1;
-        for (cur = 1; cur <= last_pos; cur++) {
-            inr = ip + cur;
-
-            if (opt[cur-1].mlen == 1) {
-                litlen = opt[cur-1].litlen + 1;
-                if (cur != litlen) {
-                    price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
-                } else {
-                    price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
-                }
-            } else {
-                litlen = 1;
-                price = opt[cur - 1].price + LZ4HC_literalsPrice(litlen);
-            }
-
-            mlen = 1;
-            best_mlen = 0;
-            if (cur > last_pos || price < (size_t)opt[cur].price)
-                SET_PRICE(cur, mlen, best_mlen, litlen, price);
-
-            if (cur == last_pos || inr >= mflimit) break;
-
-            match_num = LZ4HC_BinTree_GetAllMatches(ctx, inr, matchlimit, MINMATCH-1, matches, fullUpdate);
-            if (match_num > 0 && (size_t)matches[match_num-1].len > sufficient_len) {
-                best_mlen = matches[match_num-1].len;
-                best_off = matches[match_num-1].off;
-                last_pos = cur + 1;
-                goto encode;
-            }
-
-            /* set prices using matches at position = cur */
-            for (i = 0; i < match_num; i++) {
-                mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH;
-                cur2 = cur;
-                best_mlen = (cur2 + matches[i].len < LZ4_OPT_NUM) ? (size_t)matches[i].len : LZ4_OPT_NUM - cur2;
-
-                while (mlen <= best_mlen) {
-                    if (opt[cur2].mlen == 1) {
-                        litlen = opt[cur2].litlen;
-
-                        if (cur2 != litlen)
-                            price = opt[cur2 - litlen].price + LZ4HC_sequencePrice(litlen, mlen);
-                        else
-                            price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen);
-                    } else {
-                        litlen = 0;
-                        price = opt[cur2].price + LZ4HC_sequencePrice(litlen, mlen);
-                    }
-
-                    if (cur2 + mlen > last_pos || price < (size_t)opt[cur2 + mlen].price) { // || (((int)price == opt[cur2 + mlen].price) && (opt[cur2 + mlen-1].mlen == 1))) {
-                        SET_PRICE(cur2 + mlen, mlen, matches[i].off, litlen, price);
-                    }
-                    mlen++;
-                }
-            }
-        } /* for (cur = 1; cur <= last_pos; cur++) */
-
-        best_mlen = opt[last_pos].mlen;
-        best_off = opt[last_pos].off;
-        cur = last_pos - best_mlen;
-
-encode: /* cur, last_pos, best_mlen, best_off have to be set */
-        opt[0].mlen = 1;
-        while (1) {
-            mlen = opt[cur].mlen;
-            offset = opt[cur].off;
-            opt[cur].mlen = (int)best_mlen;
-            opt[cur].off = (int)best_off;
-            best_mlen = mlen;
-            best_off = offset;
-            if (mlen > cur) break;
-            cur -= mlen;
-        }
-
-        cur = 0;
-        while (cur < last_pos) {
-            mlen = opt[cur].mlen;
-            if (mlen == 1) { ip++; cur++; continue; }
-            offset = opt[cur].off;
-            cur += mlen;
-
-            res = LZ4HC_encodeSequence(&ip, &op, &anchor, (int)mlen, ip - offset, limit, oend);
-            if (res) return 0;
-        }
-    }
-
-    /* Encode Last Literals */
-    {   int lastRun = (int)(iend - anchor);
-        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    /* End */
-    return (int) ((char*)op-dest);
-}
diff --git a/lib/xxhash.c b/lib/xxhash.c
index e9ff2d4..3fc97fd 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -52,7 +52,7 @@
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || \
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
@@ -113,19 +113,24 @@
 ***************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#  define FORCE_INLINE static __forceinline
-#else
-#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
 #endif
 
+#ifndef XXH_FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define XXH_FORCE_INLINE static __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define XXH_FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define XXH_FORCE_INLINE static inline
+#      endif
+#    else
+#      define XXH_FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* XXH_FORCE_INLINE */
+
 
 /* *************************************
 *  Basic Types
@@ -218,7 +223,7 @@
 *****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
 
-FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
@@ -226,7 +231,7 @@
         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
 }
 
-FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
 }
@@ -261,7 +266,7 @@
     return seed;
 }
 
-FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* bEnd = p + len;
@@ -376,7 +381,7 @@
 }
 
 
-FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+XXH_FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
@@ -446,7 +451,7 @@
 
 
 
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+XXH_FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
     const BYTE * p = (const BYTE*)state->mem32;
     const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
@@ -578,7 +583,7 @@
 }
 #endif
 
-FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
@@ -586,7 +591,7 @@
         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
 }
 
-FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
 }
@@ -621,7 +626,7 @@
     return acc;
 }
 
-FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
@@ -745,7 +750,7 @@
     return XXH_OK;
 }
 
-FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+XXH_FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
@@ -810,7 +815,7 @@
         return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
 }
 
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+XXH_FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
     const BYTE * p = (const BYTE*)state->mem64;
     const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
diff --git a/programs/Makefile b/programs/Makefile
index 060ce21..bd33d9b 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -1,6 +1,8 @@
 # ##########################################################################
 # LZ4 programs - Makefile
-# Copyright (C) Yann Collet 2011-2016
+# Copyright (C) Yann Collet 2011-2017
+#
+# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
 #
 # GPL v2 License
 #
@@ -28,7 +30,8 @@
 # ##########################################################################
 
 # Version numbers
-LIBVER_SRC := ../lib/lz4.h
+LZ4DIR   := ../lib
+LIBVER_SRC := $(LZ4DIR)/lz4.h
 LIBVER_MAJOR_SCRIPT:=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
 LIBVER_MINOR_SCRIPT:=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
 LIBVER_PATCH_SCRIPT:=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
@@ -36,33 +39,31 @@
 LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
 LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
-LIBVER  := $(shell echo $(LIBVER_SCRIPT))
+LIBVER   := $(shell echo $(LIBVER_SCRIPT))
 
-LZ4DIR  := ../lib
+SRCFILES := $(sort $(wildcard $(LZ4DIR)/*.c) $(wildcard *.c))
+OBJFILES := $(SRCFILES:.c=.o)
 
-SRCFILES := $(wildcard $(LZ4DIR)/*.c) $(wildcard *.c)
-OBJFILES := $(patsubst %.c,%.o,$(SRCFILES))
-
-CPPFLAGS+= -I$(LZ4DIR) -DXXH_NAMESPACE=LZ4_
-CFLAGS  ?= -O3
-DEBUGFLAGS:=-g -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
-           -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
-           -Wpointer-arith -Wstrict-aliasing=1
-CFLAGS  += $(MOREFLAGS)
-FLAGS    = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS)
+CPPFLAGS += -I$(LZ4DIR) -DXXH_NAMESPACE=LZ4_
+CFLAGS   ?= -O3
+DEBUGFLAGS:=-Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
+            -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
+            -Wpointer-arith -Wstrict-aliasing=1
+CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
+FLAGS     = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS)
 
 LZ4_VERSION=$(LIBVER)
-MD2ROFF  =ronn
-MD2ROFF_FLAGS  = --roff --warnings --manual="User Commands" --organization="lz4 $(LZ4_VERSION)"
+MD2ROFF   = ronn
+MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="lz4 $(LZ4_VERSION)"
 
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
-VOID := nul
 EXT  :=.exe
+VOID := nul
 else
-VOID := /dev/null
 EXT  :=
+VOID := /dev/null
 endif
 
 
@@ -74,30 +75,21 @@
 all32: CFLAGS+=-m32
 all32: all
 
-lz4: CFLAGS += $(DEBUGFLAGS)
 lz4: $(OBJFILES)
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 lz4-release: DEBUGFLAGS=
 lz4-release: lz4
 
-lz4c  : CFLAGS += $(DEBUGFLAGS)
-lz4c  : $(SRCFILES)
-	$(CC) $(FLAGS) -DENABLE_LZ4C_LEGACY_OPTIONS $^ -o $@$(EXT)
+lz4c: lz4
+	ln -s lz4$(EXT) lz4c$(EXT)
 
-lz4c32: CFLAGS += -m32 $(DEBUGFLAGS)
-lz4c32: $(SRCFILES)
+lz4c32: CFLAGS += -m32
+lz4c32 : $(SRCFILES)
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-clean:
-	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
-	@$(RM) core *.o *.test tmp* \
-           lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4 lz4cat
-	@echo Cleaning completed
-
-
-lz4.1: lz4.1.md
-	cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
+lz4.1: lz4.1.md $(LIBVER_SRC)
+	cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
 
 man: lz4.1
 
@@ -107,18 +99,40 @@
 preview-man: clean-man man
 	man ./lz4.1
 
+clean:
+	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
+	@$(RM) core *.o *.test tmp* \
+           lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4$(EXT) lz4cat$(EXT)
+	@echo Cleaning completed
+
 
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 
 unlz4: lz4
-	ln -s lz4 unlz4
+	ln -s lz4$(EXT) unlz4$(EXT)
 
 lz4cat: lz4
-	ln -s lz4 lz4cat
+	ln -s lz4$(EXT) lz4cat$(EXT)
 
+DESTDIR     ?=
+# directory variables : GNU conventions prefer lowercase
+# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
+# support both lower and uppercase (BSD), use lowercase in script
+PREFIX      ?= /usr/local
+prefix      ?= $(PREFIX)
+EXEC_PREFIX ?= $(prefix)
+exec_prefix ?= $(EXEC_PREFIX)
+BINDIR      ?= $(exec_prefix)/bin
+bindir      ?= $(BINDIR)
+DATAROOTDIR ?= $(prefix)/share
+datarootdir ?= $(DATAROOTDIR)
+MANDIR      ?= $(datarootdir)/man
+mandir      ?= $(MANDIR)
+MAN1DIR     ?= $(mandir)/man1
+man1dir     ?= $(MAN1DIR)
 
 ifneq (,$(filter $(shell uname),SunOS))
 INSTALL ?= ginstall
@@ -126,44 +140,33 @@
 INSTALL ?= install
 endif
 
-PREFIX  ?= /usr/local
-DESTDIR ?=
-BINDIR  ?= $(PREFIX)/bin
-
-ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS))
-MANDIR  ?= $(PREFIX)/man/man1
-else
-MANDIR  ?= $(PREFIX)/share/man/man1
-endif
-
 INSTALL_PROGRAM ?= $(INSTALL) -m 755
-INSTALL_SCRIPT  ?= $(INSTALL) -m 755
-INSTALL_MAN     ?= $(INSTALL) -m 644
+INSTALL_DATA    ?= $(INSTALL) -m 644
 
 
-install: lz4$(EXT) lz4c$(EXT)
+install: lz4
 	@echo Installing binaries
-	@$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/
-	@$(INSTALL_PROGRAM) lz4 $(DESTDIR)$(BINDIR)/lz4
-	@ln -sf lz4 $(DESTDIR)$(BINDIR)/lz4cat
-	@ln -sf lz4 $(DESTDIR)$(BINDIR)/unlz4
-	@$(INSTALL_PROGRAM) lz4c$(EXT) $(DESTDIR)$(BINDIR)/lz4c
+	@$(INSTALL) -d -m 755 $(DESTDIR)$(bindir)/ $(DESTDIR)$(man1dir)/
+	@$(INSTALL_PROGRAM) lz4$(EXT) $(DESTDIR)$(bindir)/lz4$(EXT)
+	@ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4c$(EXT)
+	@ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4cat$(EXT)
+	@ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/unlz4$(EXT)
 	@echo Installing man pages
-	@$(INSTALL_MAN) -m 644 lz4.1 $(DESTDIR)$(MANDIR)/lz4.1
-	@ln -sf lz4.1 $(DESTDIR)$(MANDIR)/lz4c.1
-	@ln -sf lz4.1 $(DESTDIR)$(MANDIR)/lz4cat.1
-	@ln -sf lz4.1 $(DESTDIR)$(MANDIR)/unlz4.1
+	@$(INSTALL_DATA) lz4.1 $(DESTDIR)$(man1dir)/lz4.1
+	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4c.1
+	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4cat.1
+	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/unlz4.1
 	@echo lz4 installation completed
 
 uninstall:
-	@$(RM) $(DESTDIR)$(BINDIR)/lz4cat
-	@$(RM) $(DESTDIR)$(BINDIR)/unlz4
-	@$(RM) $(DESTDIR)$(BINDIR)/lz4
-	@$(RM) $(DESTDIR)$(BINDIR)/lz4c
-	@$(RM) $(DESTDIR)$(MANDIR)/lz4.1
-	@$(RM) $(DESTDIR)$(MANDIR)/lz4c.1
-	@$(RM) $(DESTDIR)$(MANDIR)/lz4cat.1
-	@$(RM) $(DESTDIR)$(MANDIR)/unlz4.1
+	@$(RM) $(DESTDIR)$(bindir)/lz4cat$(EXT)
+	@$(RM) $(DESTDIR)$(bindir)/unlz4$(EXT)
+	@$(RM) $(DESTDIR)$(bindir)/lz4$(EXT)
+	@$(RM) $(DESTDIR)$(bindir)/lz4c$(EXT)
+	@$(RM) $(DESTDIR)$(man1dir)/lz4.1
+	@$(RM) $(DESTDIR)$(man1dir)/lz4c.1
+	@$(RM) $(DESTDIR)$(man1dir)/lz4cat.1
+	@$(RM) $(DESTDIR)$(man1dir)/unlz4.1
 	@echo lz4 programs successfully uninstalled
 
 endif
diff --git a/programs/bench.c b/programs/bench.c
index 8311503..11bf044 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -41,6 +41,7 @@
 #include <string.h>      /* memset */
 #include <stdio.h>       /* fprintf, fopen, ftello */
 #include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h>      /* assert */
 
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"
@@ -48,7 +49,10 @@
 
 #include "lz4.h"
 #define COMPRESSOR0 LZ4_compress_local
-static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); }
+static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) {
+  int const acceleration = (clevel < 0) ? -clevel + 1 : 1;
+  return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
+}
 #include "lz4hc.h"
 #define COMPRESSOR1 LZ4_compress_HC
 #define DEFAULTCOMPRESSOR COMPRESSOR0
@@ -66,9 +70,10 @@
 
 #define NBSECONDS             3
 #define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
+#define TIMELOOP_NANOSEC      1*1000000000ULL /* 1 second */
 #define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
 #define COOLPERIOD_SEC        10
-#define DECOMP_MULT           2 /* test decompression DECOMP_MULT times longer than compression */
+#define DECOMP_MULT           1 /* test decompression DECOMP_MULT times longer than compression */
 
 #define KB *(1 <<10)
 #define MB *(1 <<20)
@@ -117,21 +122,21 @@
 static U32 g_nbSeconds = NBSECONDS;
 static size_t g_blockSize = 0;
 int g_additionalParam = 0;
+int g_benchSeparately = 0;
 
 void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 
 void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
 
-void BMK_SetNbSeconds(unsigned nbSeconds)
+void BMK_setNbSeconds(unsigned nbSeconds)
 {
     g_nbSeconds = nbSeconds;
     DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
 }
 
-void BMK_SetBlockSize(size_t blockSize)
-{
-    g_blockSize = blockSize;
-}
+void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; }
+
+void BMK_setBenchSeparately(int separate) { g_benchSeparately = (separate!=0); }
 
 
 /* ********************************************************
@@ -166,7 +171,6 @@
     void* const compressedBuffer = malloc(maxCompressedSize);
     void* const resultBuffer = malloc(srcSize);
     U32 nbBlocks;
-    UTIL_time_t ticksPerSecond;
     struct compressionParameters compP;
     int cfunctionId;
 
@@ -176,7 +180,6 @@
 
     /* init */
     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
-    UTIL_initTimer(&ticksPerSecond);
 
     /* Init */
     if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1;
@@ -220,7 +223,9 @@
     {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
         U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
         UTIL_time_t coolTime;
-        U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 100;
+        U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 100;
+        U32 nbCompressionLoops = (U32)((5 MB) / (srcSize+1)) + 1;  /* conservative initial compression speed estimate */
+        U32 nbDecodeLoops = (U32)((200 MB) / (srcSize+1)) + 1;  /* conservative initial decode speed estimate */
         U64 totalCTime=0, totalDTime=0;
         U32 cCompleted=0, dCompleted=0;
 #       define NB_MARKS 4
@@ -229,17 +234,14 @@
         size_t cSize = 0;
         double ratio = 0.;
 
-        UTIL_getTime(&coolTime);
+        coolTime = UTIL_getTime();
         DISPLAYLEVEL(2, "\r%79s\r", "");
-        while (!cCompleted | !dCompleted) {
-            UTIL_time_t clockStart;
-            U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
-
+        while (!cCompleted || !dCompleted) {
             /* overheat protection */
-            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
+            if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
                 UTIL_sleep(COOLPERIOD_SEC);
-                UTIL_getTime(&coolTime);
+                coolTime = UTIL_getTime();
             }
 
             /* Compression */
@@ -247,22 +249,28 @@
             if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
 
             UTIL_sleepMilli(1);  /* give processor time to other processes */
-            UTIL_waitForNextTick(ticksPerSecond);
-            UTIL_getTime(&clockStart);
+            UTIL_waitForNextTick();
 
             if (!cCompleted) {   /* still some time to do compression tests */
-                U32 nbLoops = 0;
-                do {
+                UTIL_time_t const clockStart = UTIL_getTime();
+                U32 nbLoops;
+                for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
                         size_t const rSize = compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel);
                         if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4_compress() failed");
                         blockTable[blockNb].cSize = rSize;
+                }   }
+                {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
+                    if (clockSpan > 0) {
+                        if (clockSpan < fastestC * nbCompressionLoops)
+                            fastestC = clockSpan / nbCompressionLoops;
+                        assert(fastestC > 0);
+                        nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;  /* aim for ~1sec */
+                    } else {
+                        assert(nbCompressionLoops < 40000000);   /* avoid overflow */
+                        nbCompressionLoops *= 100;
                     }
-                    nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
-                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
-                    if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops;
                     totalCTime += clockSpan;
                     cCompleted = totalCTime>maxTime;
             }   }
@@ -274,50 +282,54 @@
             markNb = (markNb+1) % NB_MARKS;
             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
                     marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
-                    (double)srcSize / fastestC );
+                    ((double)srcSize / fastestC) * 1000 );
 
             (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
 #if 1
             /* Decompression */
             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
 
-            UTIL_sleepMilli(1); /* give processor time to other processes */
-            UTIL_waitForNextTick(ticksPerSecond);
-            UTIL_getTime(&clockStart);
+            UTIL_sleepMilli(5); /* give processor time to other processes */
+            UTIL_waitForNextTick();
 
             if (!dCompleted) {
-                U32 nbLoops = 0;
-                do {
+                UTIL_time_t const clockStart = UTIL_getTime();
+                U32 nbLoops;
+                for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
                         size_t const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize);
                         if (LZ4_isError(regenSize)) {
-                            DISPLAY("LZ4_decompress_safe() failed on block %u  \n", blockNb);
-                            clockLoop = 0;   /* force immediate test end */
+                            DISPLAY("LZ4_decompress_safe() failed on block %u \n", blockNb);
                             break;
                         }
-
                         blockTable[blockNb].resSize = regenSize;
+                }   }
+                {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
+                    if (clockSpan > 0) {
+                        if (clockSpan < fastestD * nbDecodeLoops)
+                            fastestD = clockSpan / nbDecodeLoops;
+                        assert(fastestD > 0);
+                        nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;  /* aim for ~1sec */
+                    } else {
+                        assert(nbDecodeLoops < 40000000);   /* avoid overflow */
+                        nbDecodeLoops *= 100;
                     }
-                    nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < DECOMP_MULT*clockLoop);
-                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
-                    if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
                     totalDTime += clockSpan;
-                    dCompleted = totalDTime>(DECOMP_MULT*maxTime);
+                    dCompleted = totalDTime > (DECOMP_MULT*maxTime);
             }   }
 
             markNb = (markNb+1) % NB_MARKS;
             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
                     marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
-                    (double)srcSize / fastestC,
-                    (double)srcSize / fastestD );
+                    ((double)srcSize / fastestC) * 1000,
+                    ((double)srcSize / fastestD) * 1000);
 
             /* CRC Checking */
             {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
                 if (crcOrig!=crcCheck) {
                     size_t u;
-                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
+                    DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
                     for (u=0; u<srcSize; u++) {
                         if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
                             U32 segNb, bNb, pos;
@@ -341,8 +353,8 @@
         }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
 
         if (g_displayLevel == 1) {
-            double cSpeed = (double)srcSize / fastestC;
-            double dSpeed = (double)srcSize / fastestD;
+            double const cSpeed = ((double)srcSize / fastestC) * 1000;
+            double const dSpeed = ((double)srcSize / fastestD) * 1000;
             if (g_additionalParam)
                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
             else
@@ -393,7 +405,7 @@
     if (!pch) pch = strrchr(displayName, '/'); /* Linux */
     if (pch) displayName = pch+1;
 
-    SET_HIGH_PRIORITY;
+    SET_REALTIME_PRIORITY;
 
     if (g_displayLevel == 1 && !g_additionalParam)
         DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
@@ -428,7 +440,10 @@
         f = fopen(fileNamesTable[n], "rb");
         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
-        if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
+        if (fileSize > bufferSize-pos) { /* buffer too small - stop after this file */
+            fileSize = bufferSize-pos;
+            nbFiles=n;
+        }
         { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
           if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
           pos += readSize; }
@@ -455,8 +470,13 @@
     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
     if (benchedSize==0) EXM_THROW(12, "not enough memory");
     if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
-    if (benchedSize < totalSizeToLoad)
-        DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
+    if (benchedSize > LZ4_MAX_INPUT_SIZE) {
+        benchedSize = LZ4_MAX_INPUT_SIZE;
+        DISPLAY("File(s) bigger than LZ4's max input size; testing %u MB only...\n", (U32)(benchedSize >> 20));
+    } else {
+        if (benchedSize < totalSizeToLoad)
+            DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
+    }
     srcBuffer = malloc(benchedSize + !benchedSize);   /* avoid alloc of zero */
     if (!srcBuffer) EXM_THROW(12, "not enough memory");
 
@@ -498,6 +518,22 @@
 }
 
 
+int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles,
+                   int cLevel, int cLevelLast)
+{
+    unsigned fileNb;
+    if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
+    if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
+    if (cLevelLast < cLevel) cLevelLast = cLevel;
+    if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
+
+    for (fileNb=0; fileNb<nbFiles; fileNb++)
+        BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast);
+
+    return 0;
+}
+
+
 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
                    int cLevel, int cLevelLast)
 {
@@ -510,7 +546,11 @@
 
     if (nbFiles == 0)
         BMK_syntheticTest(cLevel, cLevelLast, compressibility);
-    else
-        BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
+    else {
+        if (g_benchSeparately)
+            BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast);
+        else
+            BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
+    }
     return 0;
 }
diff --git a/programs/bench.h b/programs/bench.h
index 15def93..bb67bee 100644
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -29,9 +29,10 @@
                    int cLevel, int cLevelLast);
 
 /* Set Parameters */
-void BMK_SetNbSeconds(unsigned nbLoops);
-void BMK_SetBlockSize(size_t blockSize);
+void BMK_setNbSeconds(unsigned nbLoops);
+void BMK_setBlockSize(size_t blockSize);
 void BMK_setAdditionalParam(int additionalParam);
 void BMK_setNotificationLevel(unsigned level);
+void BMK_setBenchSeparately(int separate);
 
 #endif   /* BENCH_H_125623623633 */
diff --git a/programs/datagen.c b/programs/datagen.c
index a61afc0..24a2da2 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -31,6 +31,7 @@
 #include <stdlib.h>    /* malloc */
 #include <stdio.h>     /* FILE, fwrite */
 #include <string.h>    /* memcpy */
+#include <assert.h>
 
 
 /**************************************
@@ -78,7 +79,10 @@
     while (u<LTSIZE) {
         U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1;
         U32 const end = MIN(u+weight, LTSIZE);
-        while (u < end) lt[u++] = character;
+        while (u < end) {
+            assert(u<LTSIZE);  /* try to ease static analyzer. u < end <= LTSIZE */
+            lt[u++] = character;
+        }
         character++;
         if (character > lastChar) character = firstChar;
     }
@@ -103,13 +107,11 @@
     U32* seed = seedPtr;
 
     /* special case */
-    while (matchProba >= 1.0)
-    {
+    while (matchProba >= 1.0) {
         size_t size0 = RDG_rand(seed) & 3;
         size0  = (size_t)1 << (16 + size0 * 2);
         size0 += RDG_rand(seed) & (size0-1);   /* because size0 is power of 2*/
-        if (buffSize < pos + size0)
-        {
+        if (buffSize < pos + size0) {
             memset(buffPtr+pos, 0, buffSize-pos);
             return;
         }
@@ -119,14 +121,15 @@
     }
 
     /* init */
-    if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1;
+    if (pos==0) {
+        buffPtr[0] = RDG_genChar(seed, lt);
+        pos=1;
+    }
 
     /* Generate compressible data */
-    while (pos < buffSize)
-    {
+    while (pos < buffSize) {
         /* Select : Literal (char) or Match (within 32K) */
-        if (RDG_RAND15BITS < matchProba32)
-        {
+        if (RDG_RAND15BITS < matchProba32) {
             /* Copy (within 32K) */
             size_t match;
             size_t d;
@@ -137,9 +140,7 @@
             d = pos + length;
             if (d > buffSize) d = buffSize;
             while (pos < d) buffPtr[pos++] = buffPtr[match++];
-        }
-        else
-        {
+        } else {
             /* Literal (noise) */
             size_t d;
             size_t length = RDG_RANDLENGTH;
@@ -177,12 +178,11 @@
     RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
 
     /* Generate compressible data */
-    while (total < size)
-    {
+    while (total < size) {
         RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
         if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
         total += genBlockSize;
-        fwrite(buff, 1, genBlockSize, stdout);
+        fwrite(buff, 1, genBlockSize, stdout);  /* should check potential write error */
         /* update dict */
         memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
     }
diff --git a/programs/lz4.1 b/programs/lz4.1
index 48c988a..f35e29d 100644
--- a/programs/lz4.1
+++ b/programs/lz4.1
@@ -1,5 +1,5 @@
 .
-.TH "LZ4" "1" "November 2016" "lz4 1.7.4" "User Commands"
+.TH "LZ4" "1" "September 2018" "lz4 1.8.3" "User Commands"
 .
 .SH "NAME"
 \fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files
@@ -32,7 +32,10 @@
 \fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2
 .
 .IP "\(bu" 4
-\fBlz4\fR shows real\-time notification statistics during compression or decompression of a single file (use \fB\-q\fR to silent them)
+\fBlz4 file\.lz4\fR will default to decompression (use \fB\-z\fR to force compression)
+.
+.IP "\(bu" 4
+\fBlz4\fR shows real\-time notification statistics during compression or decompression of a single file (use \fB\-q\fR to silence them)
 .
 .IP "\(bu" 4
 If no destination name is provided, result is sent to \fBstdout\fR \fIexcept if stdout is the console\fR\.
@@ -41,7 +44,10 @@
 If no destination name is provided, \fBand\fR if \fBstdout\fR is the console, \fBfile\fR is compressed into \fBfile\.lz4\fR\.
 .
 .IP "\(bu" 4
-As a consequence of previous rules, note the following example : \fBlz4 file | consumer\fR sends compressed data to \fBconsumer\fR through \fBstdout\fR, hence it does \fInot\fR create any \fBfile\.lz4\fR\.
+As a consequence of previous rules, note the following example : \fBlz4 file | consumer\fR sends compressed data to \fBconsumer\fR through \fBstdout\fR, hence it does \fInot\fR create \fBfile\.lz4\fR\.
+.
+.IP "\(bu" 4
+Another consequence of those rules is that to run \fBlz4\fR under \fBnohup\fR, you should provide a destination file: \fBnohup lz4 file file\.lz4\fR, because \fBnohup\fR writes the specified command\'s output to a file\.
 .
 .IP "" 0
 .
@@ -49,7 +55,10 @@
 Default behaviors can be modified by opt\-in commands, detailed below\.
 .
 .IP "\(bu" 4
-\fBlz4 \-m\fR makes it possible to provide multiple input filenames, which will be compressed into files using suffix \fB\.lz4\fR\. Progress notifications are also disabled by default\. This mode has a behavior which more closely mimics \fBgzip\fR command line, with the main difference being that source files are preserved by default\.
+\fBlz4 \-m\fR makes it possible to provide multiple input filenames, which will be compressed into files using suffix \fB\.lz4\fR\. Progress notifications are also disabled by default (use \fB\-v\fR to enable them)\. This mode has a behavior which more closely mimics \fBgzip\fR command line, with the main remaining difference being that source files are preserved by default\.
+.
+.IP "\(bu" 4
+Similarly, \fBlz4 \-m \-d\fR can decompress multiple \fB*\.lz4\fR files\.
 .
 .IP "\(bu" 4
 It\'s possible to opt\-in to erase source files on successful compression or decompression, using \fB\-\-rm\fR command\.
@@ -60,13 +69,21 @@
 .IP "" 0
 .
 .SS "Concatenation of \.lz4 files"
-It is possible to concatenate \fB\.lz4\fR files as is\. \fBlz4\fR will decompress such files as if they were a single \fB\.lz4\fR file\. For example: lz4 file1 > foo\.lz4 lz4 file2 >> foo\.lz4
+It is possible to concatenate \fB\.lz4\fR files as is\. \fBlz4\fR will decompress such files as if they were a single \fB\.lz4\fR file\. For example:
+.
+.IP "" 4
+.
+.nf
+
+lz4 file1  > foo\.lz4
+lz4 file2 >> foo\.lz4
+.
+.fi
+.
+.IP "" 0
 .
 .P
-then lz4cat foo\.lz4
-.
-.P
-is equivalent to : cat file1 file2
+Then \fBlz4cat foo\.lz4\fR is equivalent to \fBcat file1 file2\fR\.
 .
 .SH "OPTIONS"
 .
@@ -98,7 +115,11 @@
 .
 .TP
 \fB\-#\fR
-Compression level, with # being any value from 1 to 16\. Higher values trade compression speed for compression ratio\. Values above 16 are considered the same as 16\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\.
+Compression level, with # being any value from 1 to 12\. Higher values trade compression speed for compression ratio\. Values above 12 are considered the same as 12\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\.
+.
+.TP
+\fB\-D dictionaryName\fR
+Compress, decompress or benchmark using dictionary \fIdictionaryName\fR\. Compression and decompression must use the same dictionary to be compatible\. Using a different dictionary during decompression will either abort due to decompression error, or generate a checksum error\.
 .
 .TP
 \fB\-f\fR \fB\-\-[no\-]force\fR
@@ -134,6 +155,10 @@
 Block Dependency (improves compression ratio on small blocks)
 .
 .TP
+\fB\-\-fast[=#]\fR
+switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
+.
+.TP
 \fB\-\-[no\-]frame\-crc\fR
 Select frame checksum (default:enabled)
 .
@@ -181,6 +206,10 @@
 \fB\-\-rm\fR
 Delete source files on successful compression or decompression
 .
+.TP
+\fB\-\-\fR
+Treat all subsequent arguments as files
+.
 .SS "Benchmark mode"
 .
 .TP
@@ -193,11 +222,7 @@
 .
 .TP
 \fB\-i#\fR
-Minimum evaluation in seconds [1\-9] (default : 3)
-.
-.TP
-\fB\-r\fR
-Operate recursively on directories
+Minimum evaluation time in seconds [1\-9] (default : 3)
 .
 .SH "BUGS"
 Report bugs at: https://github\.com/lz4/lz4/issues
diff --git a/programs/lz4.1.md b/programs/lz4.1.md
index 48f3152..12b8e29 100644
--- a/programs/lz4.1.md
+++ b/programs/lz4.1.md
@@ -37,7 +37,7 @@
   * `lz4 file.lz4` will default to decompression (use `-z` to force compression)
   * `lz4` shows real-time notification statistics
      during compression or decompression of a single file
-     (use `-q` to silent them)
+     (use `-q` to silence them)
   * If no destination name is provided, result is sent to `stdout`
     _except if stdout is the console_.
   * If no destination name is provided, __and__ if `stdout` is the console,
@@ -45,6 +45,9 @@
   * As a consequence of previous rules, note the following example :
     `lz4 file | consumer` sends compressed data to `consumer` through `stdout`,
     hence it does _not_ create `file.lz4`.
+  * Another consequence of those rules is that to run `lz4` under `nohup`,
+    you should provide a destination file: `nohup lz4 file file.lz4`,
+    because `nohup` writes the specified command's output to a file.
 
 Default behaviors can be modified by opt-in commands, detailed below.
 
@@ -63,15 +66,11 @@
 It is possible to concatenate `.lz4` files as is.
 `lz4` will decompress such files as if they were a single `.lz4` file.
 For example:
+
     lz4 file1  > foo.lz4
     lz4 file2 >> foo.lz4
 
-then
-    lz4cat foo.lz4
-
-is equivalent to :
-    cat file1 file2
-
+Then `lz4cat foo.lz4` is equivalent to `cat file1 file2`.
 
 OPTIONS
 -------
@@ -118,14 +117,27 @@
 ### Operation modifiers
 
 * `-#`:
-  Compression level, with # being any value from 1 to 16.
+  Compression level, with # being any value from 1 to 12.
   Higher values trade compression speed for compression ratio.
-  Values above 16 are considered the same as 16.
+  Values above 12 are considered the same as 12.
   Recommended values are 1 for fast compression (default),
   and 9 for high compression.
   Speed/compression trade-off will vary depending on data to compress.
   Decompression speed remains fast at all settings.
 
+* `--fast[=#]`:
+  switch to ultra-fast compression levels.
+  The higher the value, the faster the compression speed, at the cost of some compression ratio.
+  If `=#` is not present, it defaults to `1`.
+  This setting overrides compression level if one was set previously.
+  Similarly, if a compression level is set after `--fast`, it overrides it.
+
+* `-D dictionaryName`:
+  Compress, decompress or benchmark using dictionary _dictionaryName_.
+  Compression and decompression must use the same dictionary to be compatible.
+  Using a different dictionary during decompression will either
+  abort due to decompression error, or generate a checksum error.
+
 * `-f` `--[no-]force`:
   This option has several effects:
 
@@ -194,6 +206,9 @@
 * `--rm` :
   Delete source files on successful compression or decompression
 
+* `--` :
+  Treat all subsequent arguments as files
+
 
 ### Benchmark mode
 
@@ -204,7 +219,7 @@
   Benchmark multiple compression levels, from b# to e# (included)
 
 * `-i#`:
-  Minimum evaluation in seconds \[1-9\] (default : 3)
+  Minimum evaluation time in seconds \[1-9\] (default : 3)
 
 
 BUGS
diff --git a/programs/lz4cli.c b/programs/lz4cli.c
index cf91a99..26a8089 100644
--- a/programs/lz4cli.c
+++ b/programs/lz4cli.c
@@ -30,14 +30,6 @@
   The license of this compression CLI program is GPLv2.
 */
 
-/**************************************
-*  Tuning parameters
-***************************************/
-/* ENABLE_LZ4C_LEGACY_OPTIONS :
-   Control the availability of -c0, -c1 and -hc legacy arguments
-   Default : Legacy options are disabled */
-/* #define ENABLE_LZ4C_LEGACY_OPTIONS */
-
 
 /****************************
 *  Includes
@@ -49,7 +41,7 @@
 #include <string.h>   /* strcmp, strlen */
 #include "bench.h"    /* BMK_benchFile, BMK_SetNbIterations, BMK_SetBlocksize, BMK_SetPause */
 #include "lz4io.h"    /* LZ4IO_compressFilename, LZ4IO_decompressFilename, LZ4IO_compressMultipleFilenames */
-#include "lz4hc.h"    /* LZ4HC_DEFAULT_CLEVEL */
+#include "lz4hc.h"    /* LZ4HC_CLEVEL_MAX */
 #include "lz4.h"      /* LZ4_VERSION_STRING */
 
 
@@ -62,6 +54,8 @@
 #define LZ4_EXTENSION ".lz4"
 #define LZ4CAT "lz4cat"
 #define UNLZ4 "unlz4"
+#define LZ4_LEGACY "lz4c"
+static int g_lz4c_legacy_commands = 0;
 
 #define KB *(1U<<10)
 #define MB *(1U<<20)
@@ -96,9 +90,6 @@
 /*-************************************
 *  Version modifiers
 ***************************************/
-#define EXTENDED_ARGUMENTS
-#define EXTENDED_HELP
-#define EXTENDED_FORMAT
 #define DEFAULT_COMPRESSOR   LZ4IO_compressFilename
 #define DEFAULT_DECOMPRESSOR LZ4IO_decompressFilename
 int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel);   /* hidden function */
@@ -119,6 +110,7 @@
     DISPLAY( " -9     : High compression \n");
     DISPLAY( " -d     : decompression (default for %s extension)\n", LZ4_EXTENSION);
     DISPLAY( " -z     : force compression \n");
+    DISPLAY( " -D FILE: use FILE as dictionary \n");
     DISPLAY( " -f     : overwrite output without prompting \n");
     DISPLAY( " -k     : preserve source files(s)  (default) \n");
     DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
@@ -144,24 +136,25 @@
     DISPLAY( " -l     : compress using Legacy format (Linux kernel compression)\n");
     DISPLAY( " -B#    : Block size [4-7] (default : 7) \n");
     DISPLAY( " -BD    : Block dependency (improve compression ratio) \n");
-    /* DISPLAY( " -BX    : enable block checksum (default:disabled)\n");   *//* Option currently inactive */
+    DISPLAY( " -BX    : enable block checksum (default:disabled) \n");
     DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n");
     DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n");
     DISPLAY( "--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)\n");
+    DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n");
+    DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
     DISPLAY( "Benchmark arguments : \n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
     DISPLAY( " -e#    : test all compression levels from -bX to # (default : 1)\n");
     DISPLAY( " -i#    : minimum evaluation time in seconds (default : 3s) \n");
     DISPLAY( " -B#    : cut file into independent blocks of size # bytes [32+] \n");
     DISPLAY( "                     or predefined block size [4-7] (default: 7) \n");
-#if defined(ENABLE_LZ4C_LEGACY_OPTIONS)
-    DISPLAY( "Legacy arguments : \n");
-    DISPLAY( " -c0    : fast compression \n");
-    DISPLAY( " -c1    : high compression \n");
-    DISPLAY( " -hc    : high compression \n");
-    DISPLAY( " -y     : overwrite output without prompting \n");
-#endif /* ENABLE_LZ4C_LEGACY_OPTIONS */
-    EXTENDED_HELP;
+    if (g_lz4c_legacy_commands) {
+        DISPLAY( "Legacy arguments : \n");
+        DISPLAY( " -c0    : fast compression \n");
+        DISPLAY( " -c1    : high compression \n");
+        DISPLAY( " -c2,-hc: very high compression \n");
+        DISPLAY( " -y     : overwrite output without prompting \n");
+    }
     return 0;
 }
 
@@ -212,17 +205,17 @@
     DISPLAY( "-------------------------------------\n");
     DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n");
     DISPLAY( "          generator | %s | consumer \n", exeName);
-#if defined(ENABLE_LZ4C_LEGACY_OPTIONS)
-    DISPLAY( "\n");
-    DISPLAY( "***** Warning  *****\n");
-    DISPLAY( "Legacy arguments take precedence. Therefore : \n");
-    DISPLAY( "---------------------------------\n");
-    DISPLAY( "          %s -hc filename\n", exeName);
-    DISPLAY( "means 'compress filename in high compression mode'\n");
-    DISPLAY( "It is not equivalent to :\n");
-    DISPLAY( "          %s -h -c filename\n", exeName);
-    DISPLAY( "which would display help text and exit\n");
-#endif /* ENABLE_LZ4C_LEGACY_OPTIONS */
+    if (g_lz4c_legacy_commands) {
+        DISPLAY( "\n");
+        DISPLAY( "***** Warning  ***** \n");
+        DISPLAY( "Legacy arguments take precedence. Therefore : \n");
+        DISPLAY( "--------------------------------- \n");
+        DISPLAY( "          %s -hc filename \n", exeName);
+        DISPLAY( "means 'compress filename in high compression mode' \n");
+        DISPLAY( "It is not equivalent to : \n");
+        DISPLAY( "          %s -h -c filename \n", exeName);
+        DISPLAY( "which displays help text and exits \n");
+    }
     return 0;
 }
 
@@ -242,21 +235,54 @@
 
 static const char* lastNameFromPath(const char* path)
 {
-    const char* name = strrchr(path, '/');
-    if (name==NULL) name = strrchr(path, '\\');   /* windows */
-    if (name==NULL) return path;
-    return name+1;
+    const char* name = path;
+    if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
+    if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
+    return name;
+}
+
+/*! exeNameMatch() :
+    @return : a non-zero value if exeName matches test, excluding the extension
+   */
+static int exeNameMatch(const char* exeName, const char* test)
+{
+    return !strncmp(exeName, test, strlen(test)) &&
+        (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
 }
 
 /*! readU32FromChar() :
-    @return : unsigned integer value reach from input in `char` format
+    @return : unsigned integer value read from input in `char` format
+    allows and interprets K, KB, KiB, M, MB and MiB suffix.
     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
-    Note : this function can overflow if result > MAX_UINT */
+    Note : function result can overflow if digit string > MAX_UINT */
 static unsigned readU32FromChar(const char** stringPtr)
 {
     unsigned result = 0;
-    while ((**stringPtr >='0') && (**stringPtr <='9'))
-        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+    while ((**stringPtr >='0') && (**stringPtr <='9')) {
+        result *= 10;
+        result += **stringPtr - '0';
+        (*stringPtr)++ ;
+    }
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        result <<= 10;
+        if (**stringPtr=='M') result <<= 10;
+        (*stringPtr)++ ;
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
+    return result;
+}
+
+/** longCommandWArg() :
+ *  check if *stringPtr is the same as longCommand.
+ *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
+ * @return 0 and doesn't modify *stringPtr otherwise.
+ */
+static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+{
+    size_t const comSize = strlen(longCommand);
+    int const result = !strncmp(*stringPtr, longCommand, comSize);
+    if (result) *stringPtr += comSize;
     return result;
 }
 
@@ -266,15 +292,17 @@
 {
     int i,
         cLevel=1,
-        cLevelLast=1,
+        cLevelLast=-10000,
         legacy_format=0,
         forceStdout=0,
         main_pause=0,
         multiple_inputs=0,
+        all_arguments_are_files=0,
         operationResult=0;
     operationMode_e mode = om_auto;
     const char* input_filename = NULL;
     const char* output_filename= NULL;
+    const char* dictionary_filename = NULL;
     char* dynNameSpace = NULL;
     const char** inFileNames = (const char**) calloc(argc, sizeof(char*));
     unsigned ifnIdx=0;
@@ -296,8 +324,8 @@
     inFileNames[0] = stdinmark;
     LZ4IO_setOverwrite(0);
 
-    /* lz4cat predefined behavior */
-    if (!strcmp(exeName, LZ4CAT)) {
+    /* predefined behaviors, based on binary/link name */
+    if (exeNameMatch(exeName, LZ4CAT)) {
         mode = om_decompress;
         LZ4IO_setOverwrite(1);
         LZ4IO_setRemoveSrcFile(0);
@@ -306,7 +334,8 @@
         displayLevel=1;
         multiple_inputs=1;
     }
-    if (!strcmp(exeName, UNLZ4)) { mode = om_decompress; }
+    if (exeNameMatch(exeName, UNLZ4)) { mode = om_decompress; }
+    if (exeNameMatch(exeName, LZ4_LEGACY)) { g_lz4c_legacy_commands=1; }
 
     /* command switches */
     for(i=1; i<argc; i++) {
@@ -315,7 +344,7 @@
         if(!argument) continue;   /* Protection if argument empty */
 
         /* Short commands (note : aggregated short commands are allowed) */
-        if (argument[0]=='-') {
+        if (!all_arguments_are_files && argument[0]=='-') {
             /* '-' means stdin/stdout */
             if (argument[1]==0) {
                 if (!input_filename) input_filename=stdinmark;
@@ -325,6 +354,7 @@
 
             /* long commands (--long-word) */
             if (argument[1]=='-') {
+                if (!strcmp(argument,  "--")) { all_arguments_are_files = 1; continue; }
                 if (!strcmp(argument,  "--compress")) { mode = om_compress; continue; }
                 if ((!strcmp(argument, "--decompress"))
                     || (!strcmp(argument, "--uncompress"))) { mode = om_decompress; continue; }
@@ -340,24 +370,45 @@
                 if (!strcmp(argument,  "--no-content-size")) { LZ4IO_setContentSize(0); continue; }
                 if (!strcmp(argument,  "--sparse")) { LZ4IO_setSparseFile(2); continue; }
                 if (!strcmp(argument,  "--no-sparse")) { LZ4IO_setSparseFile(0); continue; }
+                if (!strcmp(argument,  "--favor-decSpeed")) { LZ4IO_favorDecSpeed(1); continue; }
                 if (!strcmp(argument,  "--verbose")) { displayLevel++; continue; }
                 if (!strcmp(argument,  "--quiet")) { if (displayLevel) displayLevel--; continue; }
                 if (!strcmp(argument,  "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; }
                 if (!strcmp(argument,  "--help")) { usage_advanced(exeName); goto _cleanup; }
                 if (!strcmp(argument,  "--keep")) { LZ4IO_setRemoveSrcFile(0); continue; }   /* keep source file (default) */
                 if (!strcmp(argument,  "--rm")) { LZ4IO_setRemoveSrcFile(1); continue; }
+                if (longCommandWArg(&argument, "--fast")) {
+                        /* Parse optional acceleration factor */
+                        if (*argument == '=') {
+                            U32 fastLevel;
+                            ++argument;
+                            fastLevel = readU32FromChar(&argument);
+                            if (fastLevel) {
+                              cLevel = -(int)fastLevel;
+                            } else {
+                              badusage(exeName);
+                            }
+                        } else if (*argument != 0) {
+                            /* Invalid character following --fast */
+                            badusage(exeName);
+                        } else {
+                            cLevel = -1;  /* default for --fast */
+                        }
+                        continue;
+                    }
             }
 
             while (argument[1]!=0) {
                 argument ++;
 
-#if defined(ENABLE_LZ4C_LEGACY_OPTIONS)
-                /* Legacy arguments (-c0, -c1, -hc, -y, -s) */
-                if ((argument[0]=='c') && (argument[1]=='0')) { cLevel=0; argument++; continue; }  /* -c0 (fast compression) */
-                if ((argument[0]=='c') && (argument[1]=='1')) { cLevel=9; argument++; continue; }  /* -c1 (high compression) */
-                if ((argument[0]=='h') && (argument[1]=='c')) { cLevel=9; argument++; continue; }  /* -hc (high compression) */
-                if (*argument=='y') { LZ4IO_setOverwrite(1); continue; }                           /* -y (answer 'yes' to overwrite permission) */
-#endif /* ENABLE_LZ4C_LEGACY_OPTIONS */
+                if (g_lz4c_legacy_commands) {
+                    /* Legacy commands (-c0, -c1, -hc, -y) */
+                    if (!strcmp(argument,  "c0")) { cLevel=0; argument++; continue; }  /* -c0 (fast compression) */
+                    if (!strcmp(argument,  "c1")) { cLevel=9; argument++; continue; }  /* -c1 (high compression) */
+                    if (!strcmp(argument,  "c2")) { cLevel=12; argument++; continue; } /* -c2 (very high compression) */
+                    if (!strcmp(argument,  "hc")) { cLevel=12; argument++; continue; } /* -hc (very high compression) */
+                    if (!strcmp(argument,  "y"))  { LZ4IO_setOverwrite(1); continue; } /* -y (answer 'yes' to overwrite permission) */
+                }
 
                 if ((*argument>='0') && (*argument<='9')) {
                     cLevel = readU32FromChar(&argument);
@@ -382,6 +433,22 @@
                     /* Compression (default) */
                 case 'z': mode = om_compress; break;
 
+                case 'D':
+                    if (argument[1] == '\0') {
+                        /* path is next arg */
+                        if (i + 1 == argc) {
+                            /* there is no next arg */
+                            badusage(exeName);
+                        }
+                        dictionary_filename = argv[++i];
+                    } else {
+                        /* path follows immediately */
+                        dictionary_filename = argument + 1;
+                    }
+                    /* skip to end of argument so that we jump to parsing next argument */
+                    argument += strlen(argument) - 1;
+                    break;
+
                     /* Use Legacy format (ex : Linux kernel compression) */
                 case 'l': legacy_format = 1; blockSize = 8 MB; break;
 
@@ -426,11 +493,11 @@
                                 if (B < 4) badusage(exeName);
                                 if (B <= 7) {
                                     blockSize = LZ4IO_setBlockSizeID(B);
-                                    BMK_SetBlockSize(blockSize);
+                                    BMK_setBlockSize(blockSize);
                                     DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
                                 } else {
                                     if (B < 32) badusage(exeName);
-                                    BMK_SetBlockSize(B);
+                                    BMK_setBlockSize(B);
                                     if (B >= 1024) {
                                         DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10));
                                     } else {
@@ -448,10 +515,15 @@
                 case 'b': mode = om_bench; multiple_inputs=1;
                     break;
 
+                    /* hidden command : benchmark files, but do not fuse result */
+                case 'S': BMK_setBenchSeparately(1);
+                    break;
+
 #ifdef UTIL_HAS_CREATEFILELIST
                     /* recursive */
-                case 'r': recursive=1;  /* without break */
+                case 'r': recursive=1;
 #endif
+                    /* fall-through */
                     /* Treat non-option args as input files.  See https://code.google.com/p/lz4/issues/detail?id=151 */
                 case 'm': multiple_inputs=1;
                     break;
@@ -463,16 +535,13 @@
                         iters = readU32FromChar(&argument);
                         argument--;
                         BMK_setNotificationLevel(displayLevel);
-                        BMK_SetNbSeconds(iters);   /* notification if displayLevel >= 3 */
+                        BMK_setNbSeconds(iters);   /* notification if displayLevel >= 3 */
                     }
                     break;
 
                     /* Pause at the end (hidden option) */
                 case 'p': main_pause=1; break;
 
-                    /* Specific commands for customized versions */
-                EXTENDED_ARGUMENTS;
-
                     /* Unrecognised command */
                 default : badusage(exeName);
                 }
@@ -510,7 +579,8 @@
 #ifdef _FILE_OFFSET_BITS
     DISPLAYLEVEL(4, "_FILE_OFFSET_BITS defined: %ldL\n", (long) _FILE_OFFSET_BITS);
 #endif
-    if ((mode == om_compress) || (mode == om_bench)) DISPLAYLEVEL(4, "Blocks size : %i KB\n", (U32)(blockSize>>10));
+    if ((mode == om_compress) || (mode == om_bench))
+        DISPLAYLEVEL(4, "Blocks size : %u KB\n", (U32)(blockSize>>10));
 
     if (multiple_inputs) {
         input_filename = inFileNames[0];
@@ -523,8 +593,7 @@
                 free((void*)inFileNames);
                 inFileNames = extendedFileList;
                 ifnIdx = fileNamesNb;
-            }
-        }
+        }   }
 #endif
     }
 
@@ -541,6 +610,14 @@
         mode = om_decompress;   /* defer to decompress */
     }
 
+    if (dictionary_filename) {
+        if (!strcmp(dictionary_filename, stdinmark) && IS_CONSOLE(stdin)) {
+            DISPLAYLEVEL(1, "refusing to read from a console\n");
+            exit(1);
+        }
+        LZ4IO_setDictionaryFilename(dictionary_filename);
+    }
+
     /* compress or decompress */
     if (!input_filename) input_filename = stdinmark;
     /* Check if input is defined as console; trigger an error in this case */
@@ -608,7 +685,7 @@
             operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename);
     } else {   /* compression is default action */
         if (legacy_format) {
-            DISPLAYLEVEL(3, "! Generating compressed LZ4 using Legacy format (deprecated) ! \n");
+            DISPLAYLEVEL(3, "! Generating LZ4 Legacy format (deprecated) ! \n");
             LZ4IO_compressFilename_Legacy(input_filename, output_filename, cLevel);
         } else {
             if (multiple_inputs)
@@ -620,12 +697,13 @@
 
 _cleanup:
     if (main_pause) waitEnter();
-    if (dynNameSpace) free(dynNameSpace);
+    free(dynNameSpace);
 #ifdef UTIL_HAS_CREATEFILELIST
-    if (extendedFileList)
+    if (extendedFileList) {
         UTIL_freeFileList(extendedFileList, fileNamesBuf);
-    else
+        inFileNames = NULL;
+    }
 #endif
-        free((void*)inFileNames);
+    free((void*)inFileNames);
     return operationResult;
 }
diff --git a/programs/lz4io.c b/programs/lz4io.c
index 640c76d..28d6537 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c
@@ -1,6 +1,6 @@
 /*
   LZ4io.c - LZ4 File/Stream Interface
-  Copyright (C) Yann Collet 2011-2016
+  Copyright (C) Yann Collet 2011-2017
 
   GPL v2 License
 
@@ -56,20 +56,10 @@
 #include "lz4io.h"
 #include "lz4.h"       /* still required for legacy format */
 #include "lz4hc.h"     /* still required for legacy format */
+#define LZ4F_STATIC_LINKING_ONLY
 #include "lz4frame.h"
 
 
-/* **************************************
-*  Compiler Options
-****************************************/
-#if defined(_MSC_VER) && (_MSC_VER >= 1400)            /* Avoid MSVC fseek()'s 2GiB barrier */
-#  define fseek _fseeki64
-#endif
-#if !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
-#  define fseek fseeko
-#endif
-
-
 /*****************************
 *  Constants
 *****************************/
@@ -93,6 +83,7 @@
 #define LEGACY_BLOCKSIZE   (8 MB)
 #define MIN_STREAM_BUFSIZE (192 KB)
 #define LZ4IO_BLOCKSIZEID_DEFAULT 7
+#define LZ4_MAX_DICT_SIZE (64 KB)
 
 
 /**************************************
@@ -103,9 +94,12 @@
 static int g_displayLevel = 0;   /* 0 : no display  ; 1: errors  ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if (((clock_t)(g_time - clock()) > refreshRate) || (g_displayLevel>=4)) \
-            { g_time = clock(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
+            if ( ((clock() - g_time) > refreshRate)    \
+              || (g_displayLevel>=4) ) {               \
+                g_time = clock();                      \
+                DISPLAY(__VA_ARGS__);                  \
+                if (g_displayLevel>=4) fflush(stderr); \
+        }   }
 static const clock_t refreshRate = CLOCKS_PER_SEC / 6;
 static clock_t g_time = 0;
 
@@ -121,6 +115,9 @@
 static int g_blockIndependence = 1;
 static int g_sparseFileSupport = 1;
 static int g_contentSizeFlag = 0;
+static int g_useDictionary = 0;
+static unsigned g_favorDecSpeed = 0;
+static const char* g_dictionaryFilename = NULL;
 
 
 /**************************************
@@ -153,6 +150,12 @@
 /* ****************** Parameters ******************** */
 /* ************************************************** */
 
+int LZ4IO_setDictionaryFilename(const char* dictionaryFilename) {
+    g_dictionaryFilename = dictionaryFilename;
+    g_useDictionary = dictionaryFilename != NULL;
+    return g_useDictionary;
+}
+
 /* Default setting : overwrite = 1; return : overwrite mode (0/1) */
 int LZ4IO_setOverwrite(int yes)
 {
@@ -184,17 +187,17 @@
     return g_blockIndependence;
 }
 
-/* Default setting : no checksum */
-int LZ4IO_setBlockChecksumMode(int xxhash)
+/* Default setting : no block checksum */
+int LZ4IO_setBlockChecksumMode(int enable)
 {
-    g_blockChecksum = (xxhash != 0);
+    g_blockChecksum = (enable != 0);
     return g_blockChecksum;
 }
 
 /* Default setting : checksum enabled */
-int LZ4IO_setStreamChecksumMode(int xxhash)
+int LZ4IO_setStreamChecksumMode(int enable)
 {
-    g_streamChecksum = (xxhash != 0);
+    g_streamChecksum = (enable != 0);
     return g_streamChecksum;
 }
 
@@ -219,6 +222,12 @@
     return g_contentSizeFlag;
 }
 
+/* Default setting : 0 (disabled) */
+void LZ4IO_favorDecSpeed(int favor)
+{
+    g_favorDecSpeed = (favor!=0);
+}
+
 static U32 g_removeSrcFile = 0;
 void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
 
@@ -229,11 +238,13 @@
 ** ************************************************************************ */
 
 static int LZ4IO_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
-static int LZ4IO_isSkippableMagicNumber(unsigned int magic) { return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0; }
+static int LZ4IO_isSkippableMagicNumber(unsigned int magic) {
+    return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0;
+}
 
 
 /** LZ4IO_openSrcFile() :
- * condition : `dstFileName` must be non-NULL.
+ * condition : `srcFileName` must be non-NULL.
  * @result : FILE* to `dstFileName`, or NULL if it fails */
 static FILE* LZ4IO_openSrcFile(const char* srcFileName)
 {
@@ -302,7 +313,7 @@
 /* unoptimized version; solves endianess & alignment issues */
 static void LZ4IO_writeLE32 (void* p, unsigned value32)
 {
-    unsigned char* dstPtr = (unsigned char*)p;
+    unsigned char* const dstPtr = (unsigned char*)p;
     dstPtr[0] = (unsigned char)value32;
     dstPtr[1] = (unsigned char)(value32 >> 8);
     dstPtr[2] = (unsigned char)(value32 >> 16);
@@ -328,11 +339,11 @@
     const int outBuffSize = LZ4_compressBound(LEGACY_BLOCKSIZE);
     FILE* finput;
     FILE* foutput;
-    clock_t end;
+    clock_t clockEnd;
 
     /* Init */
-    clock_t const start = clock();
-    if (compressionlevel < 3) compressionFunction = LZ4IO_LZ4_compress; else compressionFunction = LZ4_compress_HC;
+    clock_t const clockStart = clock();
+    compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC;
 
     finput = LZ4IO_openSrcFile(input_filename);
     if (finput == NULL) EXM_THROW(20, "%s : open file error ", input_filename);
@@ -347,7 +358,7 @@
     /* Write Archive Header */
     LZ4IO_writeLE32(out_buff, LEGACY_MAGICNUMBER);
     { size_t const sizeCheck = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput);
-      if (sizeCheck!=MAGICNUMBER_SIZE) EXM_THROW(22, "Write error : cannot write header"); }
+      if (sizeCheck != MAGICNUMBER_SIZE) EXM_THROW(22, "Write error : cannot write header"); }
 
     /* Main Loop */
     while (1) {
@@ -361,24 +372,27 @@
         /* Compress Block */
         outSize = compressionFunction(in_buff, out_buff+4, (int)inSize, outBuffSize, compressionlevel);
         compressedfilesize += outSize+4;
-        DISPLAYUPDATE(2, "\rRead : %i MB  ==> %.2f%%   ", (int)(filesize>>20), (double)compressedfilesize/filesize*100);
+        DISPLAYUPDATE(2, "\rRead : %i MB  ==> %.2f%%   ",
+                (int)(filesize>>20), (double)compressedfilesize/filesize*100);
 
         /* Write Block */
         LZ4IO_writeLE32(out_buff, outSize);
         {   size_t const sizeCheck = fwrite(out_buff, 1, outSize+4, foutput);
-            if (sizeCheck!=(size_t)(outSize+4)) EXM_THROW(24, "Write error : cannot write compressed block");
+            if (sizeCheck!=(size_t)(outSize+4))
+                EXM_THROW(24, "Write error : cannot write compressed block");
     }   }
     if (ferror(finput)) EXM_THROW(25, "Error while reading %s ", input_filename);
 
     /* Status */
-    end = clock();
-    if (end==start) end+=1;  /* avoid division by zero (speed) */
+    clockEnd = clock();
+    if (clockEnd==clockStart) clockEnd+=1;  /* avoid division by zero (speed) */
     filesize += !filesize;   /* avoid division by zero (ratio) */
     DISPLAYLEVEL(2, "\r%79s\r", "");   /* blank line */
     DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
         filesize, compressedfilesize, (double)compressedfilesize / filesize * 100);
-    {   double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);
+    {   double const seconds = (double)(clockEnd - clockStart) / CLOCKS_PER_SEC;
+        DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MB/s\n", seconds,
+                        (double)filesize / seconds / 1024 / 1024);
     }
 
     /* Close & Free */
@@ -401,8 +415,80 @@
     void*  dstBuffer;
     size_t dstBufferSize;
     LZ4F_compressionContext_t ctx;
+    LZ4F_CDict* cdict;
 } cRess_t;
 
+static void* LZ4IO_createDict(const char* dictFilename, size_t *dictSize) {
+    size_t readSize;
+    size_t dictEnd = 0;
+    size_t dictLen = 0;
+    size_t dictStart;
+    size_t circularBufSize = LZ4_MAX_DICT_SIZE;
+    char* circularBuf;
+    char* dictBuf;
+    FILE* dictFile;
+
+    if (!dictFilename) EXM_THROW(25, "Dictionary error : no filename provided");
+
+    circularBuf = (char *) malloc(circularBufSize);
+    if (!circularBuf) EXM_THROW(25, "Allocation error : not enough memory");
+
+    dictFile = LZ4IO_openSrcFile(dictFilename);
+    if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file");
+
+    /* opportunistically seek to the part of the file we care about. If this */
+    /* fails it's not a problem since we'll just read everything anyways.    */
+    if (strcmp(dictFilename, stdinmark)) {
+        (void)UTIL_fseek(dictFile, -LZ4_MAX_DICT_SIZE, SEEK_END);
+    }
+
+    do {
+        readSize = fread(circularBuf + dictEnd, 1, circularBufSize - dictEnd, dictFile);
+        dictEnd = (dictEnd + readSize) % circularBufSize;
+        dictLen += readSize;
+    } while (readSize>0);
+
+    if (dictLen > LZ4_MAX_DICT_SIZE) {
+        dictLen = LZ4_MAX_DICT_SIZE;
+    }
+
+    *dictSize = dictLen;
+
+    dictStart = (circularBufSize + dictEnd - dictLen) % circularBufSize;
+
+    if (dictStart == 0) {
+        /* We're in the simple case where the dict starts at the beginning of our circular buffer. */
+        dictBuf = circularBuf;
+        circularBuf = NULL;
+    } else {
+        /* Otherwise, we will alloc a new buffer and copy our dict into that. */
+        dictBuf = (char *) malloc(dictLen ? dictLen : 1);
+        if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory");
+
+        memcpy(dictBuf, circularBuf + dictStart, circularBufSize - dictStart);
+        memcpy(dictBuf + circularBufSize - dictStart, circularBuf, dictLen - (circularBufSize - dictStart));
+    }
+
+    fclose(dictFile);
+    free(circularBuf);
+
+    return dictBuf;
+}
+
+static LZ4F_CDict* LZ4IO_createCDict(void) {
+    size_t dictionarySize;
+    void* dictionaryBuffer;
+    LZ4F_CDict* cdict;
+    if (!g_useDictionary) {
+        return NULL;
+    }
+    dictionaryBuffer = LZ4IO_createDict(g_dictionaryFilename, &dictionarySize);
+    if (!dictionaryBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
+    cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize);
+    free(dictionaryBuffer);
+    return cdict;
+}
+
 static cRess_t LZ4IO_createCResources(void)
 {
     const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
@@ -418,6 +504,8 @@
     ress.dstBuffer = malloc(ress.dstBufferSize);
     if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
 
+    ress.cdict = LZ4IO_createCDict();
+
     return ress;
 }
 
@@ -425,6 +513,10 @@
 {
     free(ress.srcBuffer);
     free(ress.dstBuffer);
+
+    LZ4F_freeCDict(ress.cdict);
+    ress.cdict = NULL;
+
     { LZ4F_errorCode_t const errorCode = LZ4F_freeCompressionContext(ress.ctx);
       if (LZ4F_isError(errorCode)) EXM_THROW(38, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); }
 }
@@ -461,7 +553,9 @@
     prefs.compressionLevel = compressionLevel;
     prefs.frameInfo.blockMode = (LZ4F_blockMode_t)g_blockIndependence;
     prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)g_blockSizeId;
+    prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)g_blockChecksum;
     prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)g_streamChecksum;
+    prefs.favorDecSpeed = g_favorDecSpeed;
     if (g_contentSizeFlag) {
       U64 const fileSize = UTIL_getFileSize(srcFileName);
       prefs.frameInfo.contentSize = fileSize;   /* == 0 if input == stdin */
@@ -477,7 +571,7 @@
     /* single-block file */
     if (readSize < blockSize) {
         /* Compress in single pass */
-        size_t const cSize = LZ4F_compressFrame(dstBuffer, dstBufferSize, srcBuffer, readSize, &prefs);
+        size_t cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
         if (LZ4F_isError(cSize)) EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize));
         compressedfilesize = cSize;
         DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ",
@@ -493,7 +587,7 @@
     /* multiple-blocks file */
     {
         /* Write Archive Header */
-        size_t headerSize = LZ4F_compressBegin(ctx, dstBuffer, dstBufferSize, &prefs);
+        size_t headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress.cdict, &prefs);
         if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
         { size_t const sizeCheck = fwrite(dstBuffer, 1, headerSize, dstFile);
           if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
@@ -534,16 +628,23 @@
 
     /* Copy owner, file permissions and modification time */
     {   stat_t statbuf;
-        if (strcmp (srcFileName, stdinmark) && strcmp (dstFileName, stdoutmark) && UTIL_getFileStat(srcFileName, &statbuf))
+        if (strcmp (srcFileName, stdinmark)
+         && strcmp (dstFileName, stdoutmark)
+         && strcmp (dstFileName, nulmark)
+         && UTIL_getFileStat(srcFileName, &statbuf)) {
             UTIL_setFileStat(dstFileName, &statbuf);
-    }
+    }   }
 
-    if (g_removeSrcFile) { if (remove(srcFileName)) EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno)); } /* remove source file : --rm */
+    if (g_removeSrcFile) {  /* remove source file : --rm */
+        if (remove(srcFileName))
+            EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno));
+    }
 
     /* Final Status */
     DISPLAYLEVEL(2, "\r%79s\r", "");
     DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
-        filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100);   /* avoid division by zero */
+                    filesize, compressedfilesize,
+                    (double)compressedfilesize / (filesize + !filesize /* avoid division by zero */ ) * 100);
 
     return 0;
 }
@@ -551,21 +652,25 @@
 
 int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel)
 {
-    clock_t const start = clock();
+    UTIL_time_t const timeStart = UTIL_getTime();
+    clock_t const cpuStart = clock();
     cRess_t const ress = LZ4IO_createCResources();
 
-    int const issueWithSrcFile = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel);
+    int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel);
 
     /* Free resources */
     LZ4IO_freeCResources(ress);
 
     /* Final Status */
-    {   clock_t const end = clock();
-        double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
+    {   clock_t const cpuEnd = clock();
+        double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
+        U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
+        double const timeLength_s = (double)timeLength_ns / 1000000000;
+        DISPLAYLEVEL(4, "Completed in %.2f sec  (cpu load : %.0f%%)\n",
+                        timeLength_s, (cpuLoad_s / timeLength_s) * 100);
     }
 
-    return issueWithSrcFile;
+    return result;
 }
 
 
@@ -614,16 +719,16 @@
     return value32;
 }
 
-#define sizeT sizeof(size_t)
-#define maskT (sizeT - 1)
 
 static unsigned LZ4IO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
 {
+    const size_t sizeT = sizeof(size_t);
+    const size_t maskT = sizeT -1 ;
     const size_t* const bufferT = (const size_t*)buffer;   /* Buffer is supposed malloc'ed, hence aligned on size_t */
     const size_t* ptrT = bufferT;
     size_t bufferSizeT = bufferSize / sizeT;
     const size_t* const bufferTEnd = bufferT + bufferSizeT;
-    static const size_t segmentSizeT = (32 KB) / sizeT;
+    const size_t segmentSizeT = (32 KB) / sizeT;
 
     if (!g_sparseFileSupport) {  /* normal write */
         size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
@@ -633,7 +738,7 @@
 
     /* avoid int overflow */
     if (storedSkips > 1 GB) {
-        int const seekResult = fseek(file, 1 GB, SEEK_CUR);
+        int const seekResult = UTIL_fseek(file, 1 GB, SEEK_CUR);
         if (seekResult != 0) EXM_THROW(71, "1 GB skip error (sparse file support)");
         storedSkips -= 1 GB;
     }
@@ -650,7 +755,7 @@
 
         if (nb0T != seg0SizeT) {   /* not all 0s */
             errno = 0;
-            {   int const seekResult = fseek(file, storedSkips, SEEK_CUR);
+            {   int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
                 if (seekResult) EXM_THROW(72, "Sparse skip error(%d): %s ; try --no-sparse", (int)errno, strerror(errno));
             }
             storedSkips = 0;
@@ -670,7 +775,7 @@
         for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
         storedSkips += (unsigned) (restPtr - restStart);
         if (restPtr != restEnd) {
-            int const seekResult = fseek(file, storedSkips, SEEK_CUR);
+            int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
             if (seekResult) EXM_THROW(74, "Sparse skip error ; try --no-sparse");
             storedSkips = 0;
             {   size_t const sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, file);
@@ -684,7 +789,7 @@
 static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
 {
     if (storedSkips>0) {   /* implies g_sparseFileSupport>0 */
-        int const seekResult = fseek(file, storedSkips-1, SEEK_CUR);
+        int const seekResult = UTIL_fseek(file, storedSkips-1, SEEK_CUR);
         if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n");
         {   const char lastZeroByte[1] = { 0 };
             size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file);
@@ -693,22 +798,19 @@
 }
 
 
-static unsigned g_magicRead = 0;
+static unsigned g_magicRead = 0;   /* out-parameter of LZ4IO_decodeLegacyStream() */
 static unsigned long long LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput)
 {
-    unsigned long long filesize = 0;
-    char* in_buff;
-    char* out_buff;
+    unsigned long long streamSize = 0;
     unsigned storedSkips = 0;
 
     /* Allocate Memory */
-    in_buff = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE));
-    out_buff = (char*)malloc(LEGACY_BLOCKSIZE);
+    char* const in_buff  = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE));
+    char* const out_buff = (char*)malloc(LEGACY_BLOCKSIZE);
     if (!in_buff || !out_buff) EXM_THROW(51, "Allocation error : not enough memory");
 
     /* Main Loop */
     while (1) {
-        int decodeSize;
         unsigned int blockSize;
 
         /* Block Size */
@@ -727,13 +829,12 @@
           if (sizeCheck!=blockSize) EXM_THROW(52, "Read error : cannot access compressed block !"); }
 
         /* Decode Block */
-        decodeSize = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE);
-        if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !");
-        filesize += decodeSize;
-
-        /* Write Block */
-        storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, decodeSize, storedSkips);
-    }
+        {   int const decodeSize = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE);
+            if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !");
+            streamSize += decodeSize;
+            /* Write Block */
+            storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, decodeSize, storedSkips); /* success or die */
+    }   }
     if (ferror(finput)) EXM_THROW(54, "Read error : ferror");
 
     LZ4IO_fwriteSparseEnd(foutput, storedSkips);
@@ -742,7 +843,7 @@
     free(in_buff);
     free(out_buff);
 
-    return filesize;
+    return streamSize;
 }
 
 
@@ -754,8 +855,21 @@
     size_t dstBufferSize;
     FILE*  dstFile;
     LZ4F_decompressionContext_t dCtx;
+    void*  dictBuffer;
+    size_t dictBufferSize;
 } dRess_t;
 
+static void LZ4IO_loadDDict(dRess_t* ress) {
+    if (!g_useDictionary) {
+        ress->dictBuffer = NULL;
+        ress->dictBufferSize = 0;
+        return;
+    }
+
+    ress->dictBuffer = LZ4IO_createDict(g_dictionaryFilename, &ress->dictBufferSize);
+    if (!ress->dictBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
+}
+
 static const size_t LZ4IO_dBufferSize = 64 KB;
 static dRess_t LZ4IO_createDResources(void)
 {
@@ -772,6 +886,8 @@
     ress.dstBuffer = malloc(ress.dstBufferSize);
     if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
 
+    LZ4IO_loadDDict(&ress);
+
     ress.dstFile = NULL;
     return ress;
 }
@@ -782,6 +898,7 @@
     if (LZ4F_isError(errorCode)) EXM_THROW(69, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode));
     free(ress.srcBuffer);
     free(ress.dstBuffer);
+    free(ress.dictBuffer);
 }
 
 
@@ -795,7 +912,7 @@
     {   size_t inSize = MAGICNUMBER_SIZE;
         size_t outSize= 0;
         LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER);
-        nextToLoad = LZ4F_decompress(ress.dCtx, ress.dstBuffer, &outSize, ress.srcBuffer, &inSize, NULL);
+        nextToLoad = LZ4F_decompress_usingDict(ress.dCtx, ress.dstBuffer, &outSize, ress.srcBuffer, &inSize, ress.dictBuffer, ress.dictBufferSize, NULL);
         if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "Header error : %s", LZ4F_getErrorName(nextToLoad));
     }
 
@@ -814,7 +931,7 @@
             /* Decode Input (at least partially) */
             size_t remaining = readSize - pos;
             decodedBytes = ress.dstBufferSize;
-            nextToLoad = LZ4F_decompress(ress.dCtx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
+            nextToLoad = LZ4F_decompress_usingDict(ress.dCtx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, ress.dictBuffer, ress.dictBufferSize, NULL);
             if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
             pos += remaining;
 
@@ -856,7 +973,7 @@
         total += readBytes;
         storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, storedSkips);
     }
-    if (ferror(finput)) EXM_THROW(51, "Read Error")
+    if (ferror(finput)) EXM_THROW(51, "Read Error");
 
     LZ4IO_fwriteSparseEnd(foutput, storedSkips);
     return total;
@@ -873,7 +990,7 @@
     while (offset > 0) {
         unsigned s = offset;
         if (s > stepMax) s = stepMax;
-        errorNb = fseek(fp, (long) s, SEEK_CUR);
+        errorNb = UTIL_fseek(fp, (long) s, SEEK_CUR);
         if (errorNb != 0) break;
         offset -= s;
     }
@@ -885,22 +1002,24 @@
 {
     unsigned char MNstore[MAGICNUMBER_SIZE];
     unsigned magicNumber;
-    static unsigned nbCalls = 0;
+    static unsigned nbFrames = 0;
 
     /* init */
-    nbCalls++;
+    nbFrames++;
 
     /* Check Archive Header */
     if (g_magicRead) {  /* magic number already read from finput (see legacy frame)*/
-      magicNumber = g_magicRead;
-      g_magicRead = 0;
+        magicNumber = g_magicRead;
+        g_magicRead = 0;
     } else {
-      size_t const nbReadBytes = fread(MNstore, 1, MAGICNUMBER_SIZE, finput);
-      if (nbReadBytes==0) { nbCalls = 0; return ENDOFSTREAM; }   /* EOF */
-      if (nbReadBytes != MAGICNUMBER_SIZE) EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
-      magicNumber = LZ4IO_readLE32(MNstore);   /* Little Endian format */
+        size_t const nbReadBytes = fread(MNstore, 1, MAGICNUMBER_SIZE, finput);
+        if (nbReadBytes==0) { nbFrames = 0; return ENDOFSTREAM; }   /* EOF */
+        if (nbReadBytes != MAGICNUMBER_SIZE)
+          EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
+        magicNumber = LZ4IO_readLE32(MNstore);   /* Little Endian format */
     }
-    if (LZ4IO_isSkippableMagicNumber(magicNumber)) magicNumber = LZ4IO_SKIPPABLE0;  /* fold skippable magic numbers */
+    if (LZ4IO_isSkippableMagicNumber(magicNumber))
+        magicNumber = LZ4IO_SKIPPABLE0;   /* fold skippable magic numbers */
 
     switch(magicNumber)
     {
@@ -911,22 +1030,32 @@
         return LZ4IO_decodeLegacyStream(finput, foutput);
     case LZ4IO_SKIPPABLE0:
         DISPLAYLEVEL(4, "Skipping detected skippable area \n");
-        { size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
-          if (nbReadBytes != 4) EXM_THROW(42, "Stream error : skippable size unreadable"); }
-        { unsigned const size = LZ4IO_readLE32(MNstore);     /* Little Endian format */
-          int const errorNb = fseek_u32(finput, size, SEEK_CUR);
-          if (errorNb != 0) EXM_THROW(43, "Stream error : cannot skip skippable area"); }
+        {   size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
+            if (nbReadBytes != 4)
+                EXM_THROW(42, "Stream error : skippable size unreadable");
+        }
+        {   unsigned const size = LZ4IO_readLE32(MNstore);
+            int const errorNb = fseek_u32(finput, size, SEEK_CUR);
+            if (errorNb != 0)
+                EXM_THROW(43, "Stream error : cannot skip skippable area");
+        }
         return 0;
     EXTENDED_FORMAT;  /* macro extension for custom formats */
     default:
-        if (nbCalls == 1) {  /* just started */
+        if (nbFrames == 1) {  /* just started */
+            /* Wrong magic number at the beginning of 1st stream */
             if (!g_testMode && g_overwrite) {
-                nbCalls = 0;
+                nbFrames = 0;
                 return LZ4IO_passThrough(finput, foutput, MNstore);
             }
-            EXM_THROW(44,"Unrecognized header : file cannot be decoded");   /* Wrong magic number at the beginning of 1st stream */
+            EXM_THROW(44,"Unrecognized header : file cannot be decoded");
         }
-        DISPLAYLEVEL(2, "Stream followed by undecodable data\n");
+        {   long int const position = ftell(finput);  /* only works for files < 2 GB */
+            DISPLAYLEVEL(2, "Stream followed by undecodable data ");
+            if (position != -1L)
+                DISPLAYLEVEL(2, "at position %i ", (int)position);
+            DISPLAYLEVEL(2, "\n");
+        }
         return ENDOFSTREAM;
     }
 }
@@ -935,24 +1064,26 @@
 static int LZ4IO_decompressSrcFile(dRess_t ress, const char* input_filename, const char* output_filename)
 {
     FILE* const foutput = ress.dstFile;
-    unsigned long long filesize = 0, decodedSize=0;
-    FILE* finput;
+    unsigned long long filesize = 0;
 
     /* Init */
-    finput = LZ4IO_openSrcFile(input_filename);
+    FILE* const finput = LZ4IO_openSrcFile(input_filename);
     if (finput==NULL) return 1;
 
     /* Loop over multiple streams */
-    do {
-        decodedSize = selectDecoder(ress, finput, foutput);
-        if (decodedSize != ENDOFSTREAM)
-            filesize += decodedSize;
-    } while (decodedSize != ENDOFSTREAM);
+    for ( ; ; ) {  /* endless loop, see break condition */
+        unsigned long long const decodedSize =
+                        selectDecoder(ress, finput, foutput);
+        if (decodedSize == ENDOFSTREAM) break;
+        filesize += decodedSize;
+    }
 
-    /* Close */
+    /* Close input */
     fclose(finput);
-
-    if (g_removeSrcFile) { if (remove(input_filename)) EXM_THROW(45, "Remove error : %s: %s", input_filename, strerror(errno)); }  /* remove source file : --rm */
+    if (g_removeSrcFile) {  /* --rm */
+        if (remove(input_filename))
+            EXM_THROW(45, "Remove error : %s: %s", input_filename, strerror(errno));
+    }
 
     /* Final Status */
     DISPLAYLEVEL(2, "\r%79s\r", "");
@@ -965,21 +1096,26 @@
 
 static int LZ4IO_decompressDstFile(dRess_t ress, const char* input_filename, const char* output_filename)
 {
-    FILE* foutput;
-
-    /* Init */
-    foutput = LZ4IO_openDstFile(output_filename);
+    stat_t statbuf;
+    int stat_result = 0;
+    FILE* const foutput = LZ4IO_openDstFile(output_filename);
     if (foutput==NULL) return 1;   /* failure */
 
+    if ( strcmp(input_filename, stdinmark)
+      && UTIL_getFileStat(input_filename, &statbuf))
+        stat_result = 1;
+
     ress.dstFile = foutput;
     LZ4IO_decompressSrcFile(ress, input_filename, output_filename);
 
     fclose(foutput);
 
     /* Copy owner, file permissions and modification time */
-    {   stat_t statbuf;
-        if (strcmp (input_filename, stdinmark) && strcmp (output_filename, stdoutmark) && UTIL_getFileStat(input_filename, &statbuf))
-            UTIL_setFileStat(output_filename, &statbuf);
+    if ( stat_result != 0
+      && strcmp (output_filename, stdoutmark)
+      && strcmp (output_filename, nulmark)) {
+        UTIL_setFileStat(output_filename, &statbuf);
+        /* should return value be read ? or is silent fail good enough ? */
     }
 
     return 0;
@@ -993,10 +1129,9 @@
 
     int const missingFiles = LZ4IO_decompressDstFile(ress, input_filename, output_filename);
 
-    {   clock_t const end = clock();
-        double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Done in %.2f sec  \n", seconds);
-    }
+    clock_t const end = clock();
+    double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
+    DISPLAYLEVEL(4, "Done in %.2f sec  \n", seconds);
 
     LZ4IO_freeDResources(ress);
     return missingFiles;
diff --git a/programs/lz4io.h b/programs/lz4io.h
index 6190f00..22c5e3e 100644
--- a/programs/lz4io.h
+++ b/programs/lz4io.h
@@ -64,6 +64,8 @@
 /* ****************** Parameters ******************** */
 /* ************************************************** */
 
+int LZ4IO_setDictionaryFilename(const char* dictionaryFilename);
+
 /* Default setting : overwrite = 1;
    return : overwrite mode (0/1) */
 int LZ4IO_setOverwrite(int yes);
@@ -92,10 +94,15 @@
 /* Default setting : 0 (disabled) */
 int LZ4IO_setSparseFile(int enable);
 
-/* Default setting : 0 (disabled) */
+/* Default setting : 0 == no content size present in frame header */
 int LZ4IO_setContentSize(int enable);
 
+/* Default setting : 0 == src file preserved */
 void LZ4IO_setRemoveSrcFile(unsigned flag);
 
+/* Default setting : 0 == favor compression ratio
+ * Note : 1 only works for high compression levels (10+) */
+void LZ4IO_favorDecSpeed(int favor);
+
 
 #endif  /* LZ4IO_H_237902873 */
diff --git a/programs/platform.h b/programs/platform.h
index f1040c0..c0b3840 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -30,10 +30,10 @@
 *  Compiler Options
 ****************************************/
 #if defined(_MSC_VER)
-#  define _CRT_SECURE_NO_WARNINGS   /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
-#  define _CRT_SECURE_NO_DEPRECATE  /* VS2005 - must be declared before <io.h> and <windows.h> */ 
-#  if (_MSC_VER <= 1800)            /* (1800 = Visual Studio 2013) */
-#    define snprintf sprintf_s      /* snprintf unsupported by Visual <= 2013 */
+#  define _CRT_SECURE_NO_WARNINGS    /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
+#  if (_MSC_VER <= 1800)             /* (1800 = Visual Studio 2013) */
+#    define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */
+#    define snprintf sprintf_s       /* snprintf unsupported by Visual <= 2013 */
 #  endif
 #endif
 
@@ -59,8 +59,8 @@
 /* *********************************************************
 *  Turn on Large Files support (>4GB) for 32-bit Linux/Unix
 ***********************************************************/
-#if !defined(__64BIT__)                               /* No point defining Large file for 64 bit */
-#  if !defined(_FILE_OFFSET_BITS)   
+#if !defined(__64BIT__) || defined(__MINGW32__)       /* No point defining Large file for 64 bit but MinGW-w64 requires it */
+#  if !defined(_FILE_OFFSET_BITS)
 #    define _FILE_OFFSET_BITS 64                      /* turn off_t into a 64-bit type for ftello, fseeko */
 #  endif
 #  if !defined(_LARGEFILE_SOURCE)                     /* obsolete macro, replaced with _FILE_OFFSET_BITS */
@@ -81,11 +81,13 @@
 #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
    || defined(__midipix__) || defined(__VMS))
 #  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \
-     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  /* BSD distros */
+     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  || defined(__MidnightBSD__) /* BSD distros */
 #    define PLATFORM_POSIX_VERSION 200112L
 #  else
 #    if defined(__linux__) || defined(__linux)
-#      define _POSIX_C_SOURCE 200112L  /* use feature test macro */
+#      ifndef _POSIX_C_SOURCE
+#        define _POSIX_C_SOURCE 200112L  /* use feature test macro */
+#      endif
 #    endif
 #    include <unistd.h>  /* declares _POSIX_VERSION */
 #    if defined(_POSIX_VERSION)  /* POSIX compliant */
@@ -106,9 +108,18 @@
 #if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
 #  include <unistd.h>   /* isatty */
 #  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
-#elif defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
 #  include <io.h>       /* _isatty */
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#elif defined(WIN32) || defined(_WIN32)
+#  include <io.h>      /* _isatty */
+#  include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
+#  include <stdio.h>   /* FILE */
+static __inline int IS_CONSOLE(FILE* stdStream)
+{
+    DWORD dummy;
+    return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
+}
 #else
 #  define IS_CONSOLE(stdStream) 0
 #endif
diff --git a/programs/util.h b/programs/util.h
index f3ff1b2..d74db0d 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -30,8 +30,9 @@
 *  Dependencies
 ******************************************/
 #include "platform.h"     /* PLATFORM_POSIX_VERSION */
-#include <stdlib.h>       /* malloc */
 #include <stddef.h>       /* size_t, ptrdiff_t */
+#include <stdlib.h>       /* malloc */
+#include <string.h>       /* strlen, strncpy */
 #include <stdio.h>        /* fprintf */
 #include <sys/types.h>    /* stat, utime */
 #include <sys/stat.h>     /* stat */
@@ -70,12 +71,26 @@
 #endif
 
 
+/* ************************************************************
+* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
+***************************************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#   define UTIL_fseek _fseeki64
+#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
+#  define UTIL_fseek fseeko
+#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS)
+#   define UTIL_fseek fseeko64
+#else
+#   define UTIL_fseek fseek
+#endif
+
+
 /*-****************************************
 *  Sleep functions: Windows - Posix - others
 ******************************************/
 #if defined(_WIN32)
 #  include <windows.h>
-#  define SET_HIGH_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
+#  define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
 #  define UTIL_sleep(s) Sleep(1000*s)
 #  define UTIL_sleepMilli(milli) Sleep(milli)
 #elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */
@@ -83,9 +98,9 @@
 #  include <sys/resource.h> /* setpriority */
 #  include <time.h>         /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */
 #  if defined(PRIO_PROCESS)
-#    define SET_HIGH_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
+#    define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
 #  else
-#    define SET_HIGH_PRIORITY /* disabled */
+#    define SET_REALTIME_PRIORITY /* disabled */
 #  endif
 #  define UTIL_sleep(s) sleep(s)
 #  if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L)  /* nanosleep requires POSIX.1-2001 */
@@ -94,7 +109,7 @@
 #      define UTIL_sleepMilli(milli) /* disabled */
 #  endif
 #else
-#  define SET_HIGH_PRIORITY      /* disabled */
+#  define SET_REALTIME_PRIORITY      /* disabled */
 #  define UTIL_sleep(s)          /* disabled */
 #  define UTIL_sleepMilli(milli) /* disabled */
 #endif
@@ -126,37 +141,129 @@
 /*-****************************************
 *  Time functions
 ******************************************/
-#if !defined(_WIN32)
-   typedef clock_t UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=0; }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = clock(); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
-#else
-   typedef LARGE_INTEGER UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { if (!QueryPerformanceFrequency(ticksPerSecond)) fprintf(stderr, "ERROR: QueryPerformance not present\n"); }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { QueryPerformanceCounter(x); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
+#if defined(_WIN32)   /* Windows */
+
+    typedef LARGE_INTEGER UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static LARGE_INTEGER ticksPerSecond;
+        static int init = 0;
+        if (!init) {
+            if (!QueryPerformanceFrequency(&ticksPerSecond))
+                fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n");
+            init = 1;
+        }
+        return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static LARGE_INTEGER ticksPerSecond;
+        static int init = 0;
+        if (!init) {
+            if (!QueryPerformanceFrequency(&ticksPerSecond))
+                fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n");
+            init = 1;
+        }
+        return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+    }
+
+#elif defined(__APPLE__) && defined(__MACH__)
+
+    #include <mach/mach_time.h>
+    typedef U64 UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static mach_timebase_info_data_t rate;
+        static int init = 0;
+        if (!init) {
+            mach_timebase_info(&rate);
+            init = 1;
+        }
+        return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom)) / 1000ULL;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static mach_timebase_info_data_t rate;
+        static int init = 0;
+        if (!init) {
+            mach_timebase_info(&rate);
+            init = 1;
+        }
+        return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
+    }
+
+#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || (defined(__GLIBC__) && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2) ) )
+
+    #include <time.h>
+    typedef struct timespec UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void)
+    {
+        UTIL_time_t now;
+        if (clock_gettime(CLOCK_MONOTONIC, &now))
+            fprintf(stderr, "ERROR: Failed to get time\n");   /* we could also exit() */
+        return now;
+    }
+    UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t diff;
+        if (end.tv_nsec < begin.tv_nsec) {
+            diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
+            diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
+        } else {
+            diff.tv_sec = end.tv_sec - begin.tv_sec;
+            diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
+        }
+        return diff;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+        U64 micro = 0;
+        micro += 1000000ULL * diff.tv_sec;
+        micro += diff.tv_nsec / 1000ULL;
+        return micro;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+        U64 nano = 0;
+        nano += 1000000000ULL * diff.tv_sec;
+        nano += diff.tv_nsec;
+        return nano;
+    }
+
+#else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
+
+    typedef clock_t UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
 #endif
 
 
 /* returns time span in microseconds */
-UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart, UTIL_time_t ticksPerSecond )
+UTIL_STATIC U64 UTIL_clockSpanMicro(UTIL_time_t clockStart)
 {
-    UTIL_time_t clockEnd;
-    UTIL_getTime(&clockEnd);
-    return UTIL_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd);
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeMicro(clockStart, clockEnd);
 }
 
-
-UTIL_STATIC void UTIL_waitForNextTick(UTIL_time_t ticksPerSecond)
+/* returns time span in nanoseconds */
+UTIL_STATIC U64 UTIL_clockSpanNano(UTIL_time_t clockStart)
 {
-    UTIL_time_t clockStart, clockEnd;
-    UTIL_getTime(&clockStart);
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeNano(clockStart, clockEnd);
+}
+
+UTIL_STATIC void UTIL_waitForNextTick(void)
+{
+    UTIL_time_t const clockStart = UTIL_getTime();
+    UTIL_time_t clockEnd;
     do {
-        UTIL_getTime(&clockEnd);
-    } while (UTIL_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0);
+        clockEnd = UTIL_getTime();
+    } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
 }
 
 
@@ -165,21 +272,27 @@
 *  File functions
 ******************************************/
 #if defined(_MSC_VER)
-	#define chmod _chmod
-	typedef struct _stat64 stat_t;
+    #define chmod _chmod
+    typedef struct __stat64 stat_t;
 #else
     typedef struct stat stat_t;
 #endif
 
 
+UTIL_STATIC int UTIL_isRegFile(const char* infilename);
+
+
 UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf)
 {
     int res = 0;
     struct utimbuf timebuf;
 
-	timebuf.actime = time(NULL);
-	timebuf.modtime = statbuf->st_mtime;
-	res += utime(filename, &timebuf);  /* set access and modification times */
+    if (!UTIL_isRegFile(filename))
+        return -1;
+
+    timebuf.actime = time(NULL);
+    timebuf.modtime = statbuf->st_mtime;
+    res += utime(filename, &timebuf);  /* set access and modification times */
 
 #if !defined(_WIN32)
     res += chown(filename, statbuf->st_uid, statbuf->st_gid);  /* Copy ownership */
@@ -206,13 +319,39 @@
 }
 
 
+UTIL_STATIC int UTIL_isRegFile(const char* infilename)
+{
+    stat_t statbuf;
+    return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
+}
+
+
+UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
+{
+    int r;
+    stat_t statbuf;
+#if defined(_MSC_VER)
+    r = _stat64(infilename, &statbuf);
+    if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
+#else
+    r = stat(infilename, &statbuf);
+    if (!r && S_ISDIR(statbuf.st_mode)) return 1;
+#endif
+    return 0;
+}
+
+
 UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
 {
     int r;
 #if defined(_MSC_VER)
-    struct _stat64 statbuf;
+    struct __stat64 statbuf;
     r = _stat64(infilename, &statbuf);
     if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+#elif defined(__MINGW32__) && defined (__MSVCRT__)
+    struct _stati64 statbuf;
+    r = _stati64(infilename, &statbuf);
+    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
 #else
     struct stat statbuf;
     r = stat(infilename, &statbuf);
@@ -232,37 +371,6 @@
 }
 
 
-UTIL_STATIC int UTIL_doesFileExists(const char* infilename)
-{
-    int r;
-#if defined(_MSC_VER)
-    struct _stat64 statbuf;
-    r = _stat64(infilename, &statbuf);
-    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
-#else
-    struct stat statbuf;
-    r = stat(infilename, &statbuf);
-    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
-#endif
-    return 1;
-}
-
-
-UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
-{
-    int r;
-#if defined(_MSC_VER)
-    struct _stat64 statbuf;
-    r = _stat64(infilename, &statbuf);
-    if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
-#else
-    struct stat statbuf;
-    r = stat(infilename, &statbuf);
-    if (!r && S_ISDIR(statbuf.st_mode)) return 1;
-#endif
-    return 0;
-}
-
 /*
  * A modified version of realloc().
  * If UTIL_realloc() fails the original block is freed.
diff --git a/tests/.gitignore b/tests/.gitignore
index 4c0f311..9aa42a0 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,5 +1,5 @@
 
-# test build artefacts
+# build artefacts
 datagen
 frametest
 frametest32
@@ -8,7 +8,12 @@
 fuzzer
 fuzzer32
 fasttest
+roundTripTest
+checkTag
 
 # test artefacts
 tmp*
 versionsTest
+
+# local tests
+afl
diff --git a/tests/Makefile b/tests/Makefile
index 97fa782..3de111b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,6 +1,6 @@
 # ##########################################################################
 # LZ4 programs - Makefile
-# Copyright (C) Yann Collet 2011-2016
+# Copyright (C) Yann Collet 2011-present
 #
 # GPL v2 License
 #
@@ -28,89 +28,97 @@
 # datagen : generates synthetic data samples for tests & benchmarks
 # ##########################################################################
 
-DESTDIR ?=
-PREFIX  ?= /usr/local
-BINDIR  := $(PREFIX)/bin
-MANDIR  := $(PREFIX)/share/man/man1
-LIBDIR  := ../lib
+LZ4DIR  := ../lib
 PRGDIR  := ../programs
-VOID    := /dev/null
 TESTDIR := versionsTest
 PYTHON  ?= python3
 
+DEBUGLEVEL?= 1
+DEBUGFLAGS = -g -DLZ4_DEBUG=$(DEBUGLEVEL)
 CFLAGS  ?= -O3 # can select custom optimization flags. For example : CFLAGS=-O2 make
-CFLAGS  += -g -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \
-           -Wdeclaration-after-statement -Wstrict-prototypes \
+CFLAGS  += -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
+           -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
            -Wpointer-arith -Wstrict-aliasing=1
-CFLAGS  += $(MOREFLAGS)
-CPPFLAGS:= -I$(LIBDIR) -I$(PRGDIR) -DXXH_NAMESPACE=LZ4_
+CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
+CPPFLAGS+= -I$(LZ4DIR) -I$(PRGDIR) -DXXH_NAMESPACE=LZ4_
 FLAGS    = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS)
 
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
-EXT =.exe
+EXT  =.exe
+VOID = nul
 else
-EXT =
+EXT  =
+VOID = /dev/null
 endif
-LZ4     := $(PRGDIR)/lz4$(EXT)
+LZ4 := $(PRGDIR)/lz4$(EXT)
 
 
 # Default test parameters
 TEST_FILES   := COPYING
-FUZZER_TIME  := -T3mn
+FUZZER_TIME  := -T90s
 NB_LOOPS     ?= -i1
 
 
 default: all
 
-all: fullbench fuzzer frametest datagen fasttest
+all: fullbench fuzzer frametest roundTripTest datagen
 
 all32: CFLAGS+=-m32
 all32: all
 
 lz4:
-	$(MAKE) -C $(PRGDIR) clean $@ CFLAGS="$(CFLAGS)"
+	$(MAKE) -C $(PRGDIR) $@ CFLAGS="$(CFLAGS)"
 
-lz4c:
-	$(MAKE) -C $(PRGDIR) clean $@ CFLAGS="$(CFLAGS)"
+lib liblz4.pc:
+	$(MAKE) -C $(LZ4DIR) $@ CFLAGS="$(CFLAGS)"
+
+lz4c unlz4 lz4cat: lz4
+	ln -sf $(LZ4) $(PRGDIR)/$@
 
 lz4c32:   # create a 32-bits version for 32/64 interop tests
-	$(MAKE) -C $(PRGDIR) clean $@ CFLAGS="-m32 $(CFLAGS)"
-	cp $(LZ4) $(LZ4)c32
+	$(MAKE) -C $(PRGDIR) $@ CFLAGS="-m32 $(CFLAGS)"
 
-fullbench  : $(LIBDIR)/lz4.o $(LIBDIR)/lz4hc.o $(LIBDIR)/lz4frame.o $(LIBDIR)/xxhash.o fullbench.c
+%.o : $(LZ4DIR)/%.c $(LZ4DIR)/%.h
+	$(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@
+
+fullbench : DEBUGLEVEL=0
+fullbench : lz4.o lz4hc.o lz4frame.o xxhash.o fullbench.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-fullbench-lib: fullbench.c $(LIBDIR)/xxhash.c
-	$(MAKE) -C $(LIBDIR) liblz4.a
-	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LIBDIR)/liblz4.a
+$(LZ4DIR)/liblz4.a:
+	$(MAKE) -C $(LZ4DIR) liblz4.a
 
-fullbench-dll: fullbench.c $(LIBDIR)/xxhash.c
-	$(MAKE) -C $(LIBDIR) liblz4
-	$(CC) $(FLAGS) $^ -o $@$(EXT) -DLZ4_DLL_IMPORT=1 $(LIBDIR)/dll/liblz4.dll
-
-fuzzer  : $(LIBDIR)/lz4.o $(LIBDIR)/lz4hc.o $(LIBDIR)/xxhash.o fuzzer.c
+fullbench-lib: fullbench.c $(LZ4DIR)/liblz4.a
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-frametest: $(LIBDIR)/lz4frame.o $(LIBDIR)/lz4.o $(LIBDIR)/lz4hc.o $(LIBDIR)/xxhash.o frametest.c
+fullbench-dll: fullbench.c $(LZ4DIR)/xxhash.c
+	$(MAKE) -C $(LZ4DIR) liblz4
+	$(CC) $(FLAGS) $^ -o $@$(EXT) -DLZ4_DLL_IMPORT=1 $(LZ4DIR)/dll/liblz4.dll
+
+fuzzer  : lz4.o lz4hc.o xxhash.o fuzzer.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-fasttest: $(LIBDIR)/lz4.o fasttest.c
+frametest: lz4frame.o lz4.o lz4hc.o xxhash.o frametest.c
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
+roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 datagen : $(PRGDIR)/datagen.c datagencli.c
 	$(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT)
 
 clean:
-	@$(MAKE) -C $(LIBDIR) $@ > $(VOID)
+	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
 	@$(RM) core *.o *.test tmp* \
         fullbench-dll$(EXT) fullbench-lib$(EXT) \
         fullbench$(EXT) fullbench32$(EXT) \
         fuzzer$(EXT) fuzzer32$(EXT) \
         frametest$(EXT) frametest32$(EXT) \
-        fasttest$(EXT) datagen$(EXT)
+        fasttest$(EXT) roundTripTest$(EXT) \
+        datagen$(EXT) checkTag$(EXT)
 	@rm -fR $(TESTDIR)
 	@echo Cleaning completed
 
@@ -118,11 +126,14 @@
 versionsTest:
 	$(PYTHON) test-lz4-versions.py
 
+checkTag: checkTag.c $(LZ4DIR)/lz4.h
+	$(CC) $(FLAGS) $< -o $@$(EXT)
 
-#------------------------------------------------------------------------
-#make test is validated only for Linux, OSX, kFreeBSD, FreeBSD, Hurd and
-#Solaris targets
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+
+#-----------------------------------------------------------------------------
+# validated only for Linux, OSX, BSD, Hurd and Solaris targets
+#-----------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 
 MD5:=md5sum
 ifneq (,$(filter $(shell uname), Darwin ))
@@ -134,148 +145,193 @@
 DIFF:=gdiff
 endif
 
+DD:=dd
 
-test: test-lz4 test-lz4c test-fasttest test-frametest test-fullbench test-fuzzer
+
+test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install
 
 test32: CFLAGS+=-m32
 test32: test
 
+test-install: lz4 lib liblz4.pc
+	lz4_root=.. ./test_install.sh
+
 test-lz4-sparse: lz4 datagen
 	@echo "\n ---- test sparse file support ----"
-	./datagen -g5M  -P100 > tmpSrc
-	$(LZ4) -B4D tmpSrc | $(LZ4) -dv --sparse > tmpB4
-	$(DIFF) -s tmpSrc tmpB4
-	$(LZ4) -B5D tmpSrc | $(LZ4) -dv --sparse > tmpB5
-	$(DIFF) -s tmpSrc tmpB5
-	$(LZ4) -B6D tmpSrc | $(LZ4) -dv --sparse > tmpB6
-	$(DIFF) -s tmpSrc tmpB6
-	$(LZ4) -B7D tmpSrc | $(LZ4) -dv --sparse > tmpB7
-	$(DIFF) -s tmpSrc tmpB7
-	$(LZ4) tmpSrc | $(LZ4) -dv --no-sparse > tmpNoSparse
-	$(DIFF) -s tmpSrc tmpNoSparse
-	ls -ls tmp*
-	./datagen -s1 -g1200007 -P100 | $(LZ4) | $(LZ4) -dv --sparse > tmpOdd   # Odd size file (to generate non-full last block)
-	./datagen -s1 -g1200007 -P100 | $(DIFF) -s - tmpOdd
-	ls -ls tmpOdd
-	@$(RM) tmp*
+	./datagen -g5M  -P100 > tmplsdg5M
+	$(LZ4) -B4D tmplsdg5M | $(LZ4) -dv --sparse > tmplscB4
+	$(DIFF) -s tmplsdg5M tmplscB4
+	$(LZ4) -B5D tmplsdg5M | $(LZ4) -dv --sparse > tmplscB5
+	$(DIFF) -s tmplsdg5M tmplscB5
+	$(LZ4) -B6D tmplsdg5M | $(LZ4) -dv --sparse > tmplscB6
+	$(DIFF) -s tmplsdg5M tmplscB6
+	$(LZ4) -B7D tmplsdg5M | $(LZ4) -dv --sparse > tmplscB7
+	$(DIFF) -s tmplsdg5M tmplscB7
+	$(LZ4) tmplsdg5M | $(LZ4) -dv --no-sparse > tmplsnosparse
+	$(DIFF) -s tmplsdg5M tmplsnosparse
+	ls -ls tmpls*
+	./datagen -s1 -g1200007 -P100 | $(LZ4) | $(LZ4) -dv --sparse > tmplsodd   # Odd size file (to generate non-full last block)
+	./datagen -s1 -g1200007 -P100 | $(DIFF) -s - tmplsodd
+	ls -ls tmplsodd
+	@$(RM) tmpls*
 	@echo "\n Compatibility with Console :"
 	echo "Hello World 1 !" | $(LZ4) | $(LZ4) -d -c
 	echo "Hello World 2 !" | $(LZ4) | $(LZ4) -d | cat
 	echo "Hello World 3 !" | $(LZ4) --no-frame-crc | $(LZ4) -d -c
 	@echo "\n Compatibility with Append :"
-	./datagen -P100 -g1M > tmp1M
-	cat tmp1M tmp1M > tmp2M
-	$(LZ4) -B5 -v tmp1M tmpC
-	$(LZ4) -d -v tmpC tmpR
-	$(LZ4) -d -v tmpC >> tmpR
+	./datagen -P100 -g1M > tmplsdg1M
+	cat tmplsdg1M tmplsdg1M > tmpls2M
+	$(LZ4) -B5 -v tmplsdg1M tmplsc
+	$(LZ4) -d -v tmplsc tmplsr
+	$(LZ4) -d -v tmplsc >> tmplsr
 	ls -ls tmp*
-	$(DIFF) tmp2M tmpR
-	@$(RM) tmp*
+	$(DIFF) tmpls2M tmplsr
+	@$(RM) tmpls*
 
 test-lz4-contentSize: lz4 datagen
 	@echo "\n ---- test original size support ----"
-	./datagen -g15M > tmp
-	$(LZ4) -v tmp | $(LZ4) -t
-	$(LZ4) -v --content-size tmp | $(LZ4) -d > tmp2
-	$(DIFF) -s tmp tmp2
-	# test large size [2-4] GB
-	@./datagen -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmp
-	@ls -ls tmp
-	@./datagen -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmp2
-	@ls -ls tmp2
-	$(DIFF) -s tmp tmp2
-	@$(RM) tmp*
+	./datagen -g15M > tmplc1
+	$(LZ4) -v tmplc1 | $(LZ4) -t
+	$(LZ4) -v --content-size tmplc1 | $(LZ4) -d > tmplc2
+	$(DIFF) -s tmplc1 tmplc2
+	@$(RM) tmplc*
 
 test-lz4-frame-concatenation: lz4 datagen
 	@echo "\n ---- test frame concatenation ----"
-	@echo -n > empty.test
-	@echo hi > nonempty.test
-	cat nonempty.test empty.test nonempty.test > orig.test
-	@$(LZ4) -zq empty.test > empty.lz4.test
-	@$(LZ4) -zq nonempty.test > nonempty.lz4.test
-	cat nonempty.lz4.test empty.lz4.test nonempty.lz4.test > concat.lz4.test
-	$(LZ4) -d concat.lz4.test > result.test
-	sdiff orig.test result.test
-	@$(RM) *.test
+	@echo -n > tmp-lfc-empty
+	@echo hi > tmp-lfc-nonempty
+	cat tmp-lfc-nonempty tmp-lfc-empty tmp-lfc-nonempty > tmp-lfc-src
+	@$(LZ4) -zq tmp-lfc-empty > tmp-lfc-empty.lz4
+	@$(LZ4) -zq tmp-lfc-nonempty > tmp-lfc-nonempty.lz4
+	cat tmp-lfc-nonempty.lz4 tmp-lfc-empty.lz4 tmp-lfc-nonempty.lz4 > tmp-lfc-concat.lz4
+	$(LZ4) -d tmp-lfc-concat.lz4 > tmp-lfc-result
+	sdiff tmp-lfc-src tmp-lfc-result
+	@$(RM) tmp-lfc-*
 	@echo frame concatenation test completed
 
 test-lz4-multiple: lz4 datagen
 	@echo "\n ---- test multiple files ----"
-	@./datagen -s1        > tmp1 2> $(VOID)
-	@./datagen -s2 -g100K > tmp2 2> $(VOID)
-	@./datagen -s3 -g1M   > tmp3 2> $(VOID)
-	$(LZ4) -f -m tmp*
-	ls -ls tmp*
-	@$(RM) tmp1 tmp2 tmp3
-	$(LZ4) -df -m *.lz4
-	ls -ls tmp*
-	$(LZ4) -f -m tmp1 notHere tmp2; echo $$?
-	@$(RM) tmp*
-
-unlz4:
-	@$(MAKE) -C $(PRGDIR) $@ CFLAGS="$(CFLAGS)"
-
-lz4cat:
-	@$(MAKE) -C $(PRGDIR) $@ CFLAGS="$(CFLAGS)"
+	@./datagen -s1        > tmp-tlm1 2> $(VOID)
+	@./datagen -s2 -g100K > tmp-tlm2 2> $(VOID)
+	@./datagen -s3 -g1M   > tmp-tlm3 2> $(VOID)
+	$(LZ4) -f -m tmp-tlm*
+	ls -ls tmp-tlm*
+	@$(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3
+	$(LZ4) -df -m tmp-tlm*.lz4
+	ls -ls tmp-tlm*
+	$(LZ4) -f -m tmp-tlm1 notHere tmp-tlm2; echo $$?
+	@$(RM) tmp-tlm*
 
 test-lz4-basic: lz4 datagen unlz4 lz4cat
 	@echo "\n ---- test lz4 basic compression/decompression ----"
-	./datagen -g0     | $(LZ4) -v     | $(LZ4) -t
-	./datagen -g16KB  | $(LZ4) -9     | $(LZ4) -t
-	./datagen -g20KB > tmpSrc
-	$(LZ4) < tmpSrc   | $(LZ4) -d > tmpRes
-	$(DIFF) -q tmpSrc tmpRes
-	$(LZ4) --no-frame-crc < tmpSrc | $(LZ4) -d > tmpRes
-	$(DIFF) -q tmpSrc tmpRes
-	./datagen         | $(LZ4)        | $(LZ4) -t
-	./datagen -g6M -P99 | $(LZ4) -9BD | $(LZ4) -t
-	./datagen -g17M   | $(LZ4) -9v    | $(LZ4) -qt
-	./datagen -g33M   | $(LZ4) --no-frame-crc | $(LZ4) -t
-	./datagen -g256MB | $(LZ4) -vqB4D | $(LZ4) -t
-	@echo "hello world" > tmp
-	$(LZ4) --rm -f tmp
-	ls -ls tmp         && false || true   # must fail (--rm)
-	ls -ls tmp.lz4
-	$(PRGDIR)/lz4cat tmp.lz4              # must display hello world
-	ls -ls tmp.lz4
-	$(PRGDIR)/unlz4 --rm tmp.lz4
-	ls -ls tmp
-	ls -ls tmp.lz4     && false || true   # must fail (--rm)
-	ls -ls tmp.lz4.lz4 && false || true   # must fail (unlz4)
-	$(PRGDIR)/lz4cat tmp                  # pass-through mode
-	ls -ls tmp
-	ls -ls tmp.lz4     && false || true   # must fail (lz4cat)
-	$(LZ4) tmp                         # creates tmp.lz4
-	$(PRGDIR)/lz4cat < tmp.lz4 > tmp3  # checks lz4cat works with stdin (#285)
-	$(DIFF) -q tmp tmp3
-	$(PRGDIR)/lz4cat < tmp > tmp2      # checks lz4cat works with stdin (#285)
-	$(DIFF) -q tmp tmp2
-	@$(RM) tmp*
+	./datagen -g0       | $(LZ4) -v     | $(LZ4) -t
+	./datagen -g16KB    | $(LZ4) -9     | $(LZ4) -t
+	./datagen -g20KB > tmp-tlb-dg20k
+	$(LZ4) < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec
+	$(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec
+	$(LZ4) --no-frame-crc < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec
+	$(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec
+	./datagen           | $(LZ4)        | $(LZ4) -t
+	./datagen -g6M -P99 | $(LZ4) -9BD   | $(LZ4) -t
+	./datagen -g17M     | $(LZ4) -9v    | $(LZ4) -qt
+	./datagen -g33M     | $(LZ4) --no-frame-crc | $(LZ4) -t
+	./datagen -g256MB   | $(LZ4) -vqB4D | $(LZ4) -t
+	@echo "hello world" > tmp-tlb-hw
+	$(LZ4) --rm -f tmp-tlb-hw tmp-tlb-hw.lz4
+	test ! -f tmp-tlb-hw                      # must fail (--rm)
+	test   -f tmp-tlb-hw.lz4
+	$(PRGDIR)/lz4cat tmp-tlb-hw.lz4           # must display hello world
+	test   -f tmp-tlb-hw.lz4
+	$(PRGDIR)/unlz4 --rm tmp-tlb-hw.lz4 tmp-tlb-hw
+	test   -f tmp-tlb-hw
+	test ! -f tmp-tlb-hw.lz4                  # must fail (--rm)
+	test ! -f tmp-tlb-hw.lz4.lz4              # must fail (unlz4)
+	$(PRGDIR)/lz4cat tmp-tlb-hw               # pass-through mode
+	test   -f tmp-tlb-hw
+	test ! -f tmp-tlb-hw.lz4                  # must fail (lz4cat)
+	$(LZ4) tmp-tlb-hw tmp-tlb-hw.lz4          # creates tmp-tlb-hw.lz4
+	$(PRGDIR)/lz4cat < tmp-tlb-hw.lz4 > tmp-tlb3  # checks lz4cat works with stdin (#285)
+	$(DIFF) -q tmp-tlb-hw tmp-tlb3
+	$(PRGDIR)/lz4cat < tmp-tlb-hw > tmp-tlb2      # checks lz4cat works in pass-through mode
+	$(DIFF) -q tmp-tlb-hw tmp-tlb2
+	cp tmp-tlb-hw ./-d
+	$(LZ4) --rm -- -d -d.lz4               # compresses ./d into ./-d.lz4
+	test   -f ./-d.lz4
+	test ! -f ./-d
+	mv ./-d.lz4 ./-z
+	$(LZ4) -d --rm -- -z tmp-tlb4          # uncompresses ./-z into tmp-tlb4
+	test ! -f ./-z
+	$(DIFF) -q tmp-tlb-hw tmp-tlb4
+	$(LZ4) -f tmp-tlb-hw
+	cat tmp-tlb-hw >> tmp-tlb-hw.lz4
+	$(LZ4) -f tmp-tlb-hw.lz4               # uncompress valid frame followed by invalid data
+	$(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv  # test block checksum
+	# ./datagen -g20KB generates the same file every single time
+	# cannot save output of ./datagen -g20KB as input file to lz4 because the following shell commands are run before ./datagen -g20KB
+	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9
+	test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1
+	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1
+	! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0
+	! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1
+	@$(RM) tmp-tlb*
+
+
+
+test-lz4-dict: lz4 datagen
+	@echo "\n ---- test lz4 compression/decompression with dictionary ----"
+	./datagen -g16KB > tmp-dict
+	./datagen -g32KB > tmp-dict-sample-32k
+	< tmp-dict-sample-32k $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-32k
+	./datagen -g128MB > tmp-dict-sample-128m
+	< tmp-dict-sample-128m $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-128m
+	touch tmp-dict-sample-0
+	< tmp-dict-sample-0 $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-0
+
+	< tmp-dict-sample-32k $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-32k
+	< tmp-dict-sample-0 $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-0
+
+	@echo "\n ---- test lz4 dictionary loading ----"
+	./datagen -g128KB > tmp-dict-data-128KB
+	set -e; \
+	for l in 0 1 4 128 32767 32768 32769 65535 65536 65537 98303 98304 98305 131071 131072 131073; do \
+		./datagen -g$$l > tmp-dict-$$l; \
+		$(DD) if=tmp-dict-$$l of=tmp-dict-$$l-tail bs=1 count=65536 skip=$$((l > 65536 ? l - 65536 : 0)); \
+		< tmp-dict-$$l      $(LZ4) -D stdin tmp-dict-data-128KB | $(LZ4) -dD tmp-dict-$$l-tail | $(DIFF) - tmp-dict-data-128KB; \
+		< tmp-dict-$$l-tail $(LZ4) -D stdin tmp-dict-data-128KB | $(LZ4) -dD tmp-dict-$$l      | $(DIFF) - tmp-dict-data-128KB; \
+	done
+
+	@$(RM) tmp-dict*
 
 test-lz4-hugefile: lz4 datagen
 	@echo "\n ---- test huge files compression/decompression ----"
 	./datagen -g6GB   | $(LZ4) -vB5D  | $(LZ4) -qt
 	./datagen -g6GB   | $(LZ4) -v5BD  | $(LZ4) -qt
-	@$(RM) tmp*
+	# test large file size [2-4] GB
+	@./datagen -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1
+	@ls -ls tmphf1
+	@./datagen -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmphf2
+	@ls -ls tmphf2
+	$(DIFF) -s tmphf1 tmphf2
+	@$(RM) tmphf*
 
 test-lz4-testmode: lz4 datagen
 	@echo "\n ---- bench mode ----"
 	$(LZ4) -bi1
 	@echo "\n ---- test mode ----"
-	./datagen | $(LZ4) -t             && false || true
-	./datagen | $(LZ4) -tf            && false || true
+	! ./datagen | $(LZ4) -t
+	! ./datagen | $(LZ4) -tf
 	@echo "\n ---- pass-through mode ----"
-	./datagen | $(LZ4) -d  > $(VOID)  && false || true
+	! ./datagen | $(LZ4) -d  > $(VOID)
 	./datagen | $(LZ4) -df > $(VOID)
-	@echo "Hello World !" > tmp1
-	$(LZ4) -dcf tmp1
-	@echo "from underground..." > tmp2
-	$(LZ4) -dcfm tmp1 tmp2
-	@echo "\n ---- test cli ----"
-	$(LZ4)     file-does-not-exist    && false || true
-	$(LZ4) -f  file-does-not-exist    && false || true
-	$(LZ4) -fm file1-dne file2-dne    && false || true
-	$(LZ4) -fm file1-dne file2-dne    && false || true
+	@echo "Hello World !" > tmp-tlt1
+	$(LZ4) -dcf tmp-tlt1
+	@echo "from underground..." > tmp-tlt2
+	$(LZ4) -dcfm tmp-tlt1 tmp-tlt2
+	@echo "\n ---- non-existing source ----"
+	! $(LZ4)     file-does-not-exist
+	! $(LZ4) -f  file-does-not-exist
+	! $(LZ4) -fm file1-dne file2-dne
+	@$(RM) tmp-tlt
 
 test-lz4-opt-parser: lz4 datagen
 	@echo "\n ---- test opt-parser ----"
@@ -290,12 +346,17 @@
 	./datagen -g16M -P90  | $(LZ4) -11B5    | $(LZ4) -t
 	./datagen -g32M -P10  | $(LZ4) -11B5D   | $(LZ4) -t
 
-test-lz4: lz4 datagen test-lz4-opt-parser test-lz4-basic test-lz4-multiple test-lz4-sparse \
-          test-lz4-frame-concatenation test-lz4-testmode test-lz4-contentSize \
-          test-lz4-hugefile
+test-lz4-essentials : lz4 datagen test-lz4-basic test-lz4-multiple \
+                      test-lz4-frame-concatenation test-lz4-testmode \
+                      test-lz4-contentSize test-lz4-dict
+	@$(RM) tmp*
+
+test-lz4: lz4 datagen test-lz4-essentials test-lz4-opt-parser \
+          test-lz4-sparse test-lz4-hugefile test-lz4-dict
+	@$(RM) tmp*
 
 test-lz4c: lz4c datagen
-	@echo "\n ---- test lz4c version ----"
+	@echo "\n ---- test lz4c variant ----"
 	./datagen -g256MB | $(LZ4)c -l -v    | $(LZ4)c   -t
 
 test-lz4c32: CFLAGS+=-m32
@@ -345,25 +406,21 @@
 test-frametest32: CFLAGS += -m32
 test-frametest32: test-frametest
 
-test-fasttest: fasttest
-	./fasttest
-
 test-mem: lz4 datagen fuzzer frametest fullbench
 	@echo "\n ---- valgrind tests : memory analyzer ----"
 	valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)
-	./datagen -g16KB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -BD -f tmp $(VOID)
-	./datagen -g16KB -s2 > tmp2
-	./datagen -g16KB -s3 > tmp3
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --force --multiple tmp tmp2 tmp3
-	./datagen -g16MB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -B5D -f tmp tmp2
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -t tmp2
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -bi1 tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 tmp tmp2
-	./datagen -g256MB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -B4D -f -vq tmp $(VOID)
-	$(RM) tmp*
+	./datagen -g16KB > ftmdg16K
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -BD -f ftmdg16K $(VOID)
+	./datagen -g16KB -s2 > ftmdg16K2
+	./datagen -g16KB -s3 > ftmdg16K3
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --force --multiple ftmdg16K ftmdg16K2 ftmdg16K3
+	./datagen -g7MB > ftmdg7M
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -B5D -f ftmdg7M ftmdg16K2
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -t ftmdg16K2
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -bi1 ftmdg7M
+	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 ftmdg7M ftmdg16K2
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -B4D -f -vq ftmdg7M $(VOID)
+	$(RM) ftm*
 	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i64 -t1
 	valgrind --leak-check=yes --error-exitcode=1 ./frametest -i256
 
diff --git a/tests/checkTag.c b/tests/checkTag.c
new file mode 100644
index 0000000..4a33415
--- /dev/null
+++ b/tests/checkTag.c
@@ -0,0 +1,79 @@
+/*
+    checkTag.c - Version validation tool for LZ4
+    Copyright (C) Yann Collet 2018 - present
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repo : https://github.com/lz4/lz4
+*/
+
+/* checkTag command :
+ * $ ./checkTag tag
+ * checkTag validates tags of following format : v[0-9].[0-9].[0-9]{any}
+ * The tag is then compared to LZ4 version number.
+ * They are compatible if first 3 digits are identical.
+ * Anything beyond that is free, and doesn't impact validation.
+ * Example : tag v1.8.1.2 is compatible with version 1.8.1
+ * When tag and version are not compatible, program exits with error code 1.
+ * When they are compatible, it exists with a code 0.
+ * checkTag is intended to be used in automated testing environment.
+ */
+
+#include <stdio.h>   /* printf */
+#include <string.h>  /* strlen, strncmp */
+#include "lz4.h"     /* LZ4_VERSION_STRING */
+
+
+/*  validate() :
+ * @return 1 if tag is compatible, 0 if not.
+ */
+static int validate(const char* const tag)
+{
+    size_t const tagLength = strlen(tag);
+    size_t const verLength = strlen(LZ4_VERSION_STRING);
+
+    if (tagLength < 2) return 0;
+    if (tag[0] != 'v') return 0;
+    if (tagLength <= verLength) return 0;
+
+    if (strncmp(LZ4_VERSION_STRING, tag+1, verLength)) return 0;
+
+    return 1;
+}
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+    const char* const tag = argv[1];
+    if (argc!=2) {
+        printf("incorrect usage : %s tag \n", exeName);
+        return 2;
+    }
+
+    printf("Version : %s \n", LZ4_VERSION_STRING);
+    printf("Tag     : %s \n", tag);
+
+    if (validate(tag)) {
+        printf("OK : tag is compatible with lz4 version \n");
+        return 0;
+    }
+
+    printf("!! error : tag and versions are not compatible !! \n");
+    return 1;
+}
diff --git a/tests/fasttest.c b/tests/fasttest.c
deleted file mode 100644
index a165293..0000000
--- a/tests/fasttest.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/**************************************
- * Compiler Options
- **************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define _CRT_SECURE_NO_WARNINGS // for MSVC
-#  define snprintf sprintf_s
-#endif
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-braces"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
-#endif
-
-
-/**************************************
- * Includes
- **************************************/
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include "lz4.h"
-
-
-/* Returns non-zero on failure. */
-int test_compress(const char *input, int inSize, char *output, int outSize)
-{
-    LZ4_stream_t lz4Stream_body = { 0 };
-    LZ4_stream_t* lz4Stream = &lz4Stream_body;
-
-    int inOffset = 0;
-    int outOffset = 0;
-
-    if (inSize & 3) return -1;
-
-    while (inOffset < inSize) {
-        const int length = inSize >> 2;
-        if (inSize > 1024) return -2;
-        if (outSize - (outOffset + 8) < LZ4_compressBound(length)) return -3;
-        {
-            const int outBytes = LZ4_compress_fast_continue(
-                lz4Stream, input + inOffset, output + outOffset + 8, length, outSize-outOffset, 1);
-            if(outBytes <= 0) return -4;
-            memcpy(output + outOffset, &length, 4); /* input length */
-            memcpy(output + outOffset + 4, &outBytes, 4); /* output length */
-            inOffset += length;
-            outOffset += outBytes + 8;
-        }
-    }
-    if (outOffset + 8 > outSize) return -5;
-    memset(output + outOffset, 0, 4);
-    memset(output + outOffset + 4, 0, 4);
-    return 0;
-}
-
-/* Returns non-zero on failure. Not a safe function. */
-int test_decompress(const char *uncompressed, const char *compressed)
-{
-    char outBufferA[1024];
-    char spacing; /* So prefixEnd != dest */
-    char outBufferB[1024];
-    char *output = outBufferA;
-    char *lastOutput = outBufferB;
-    LZ4_streamDecode_t lz4StreamDecode_body = { 0 };
-    LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;
-    int offset = 0;
-    int unOffset = 0;
-    int lastBytes = 0;
-
-    (void)spacing;
-
-    for(;;) {
-        int32_t bytes;
-        int32_t unBytes;
-        /* Read uncompressed size and compressed size */
-        memcpy(&unBytes, compressed + offset, 4);
-        memcpy(&bytes, compressed + offset + 4, 4);
-        offset += 8;
-        /* Check if we reached end of stream or error */
-        if(bytes == 0 && unBytes == 0) return 0;
-        if(bytes <= 0 || unBytes <= 0 || unBytes > 1024) return 1;
-
-        /* Put the last output in the dictionary */
-        LZ4_setStreamDecode(lz4StreamDecode, lastOutput, lastBytes);
-        /* Decompress */
-        bytes = LZ4_decompress_fast_continue(
-            lz4StreamDecode, compressed + offset, output, unBytes);
-        if(bytes <= 0) return 2;
-        /* Check result */
-        {   int const r = memcmp(uncompressed + unOffset, output, unBytes);
-            if (r) return 3;
-        }
-        { char* const tmp = output; output = lastOutput; lastOutput = tmp; }
-        offset += bytes;
-        unOffset += unBytes;
-        lastBytes = unBytes;
-    }
-}
-
-
-int main(int argc, char **argv)
-{
-    char input[] =
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello!"
-        "Hello Hello Hello Hello Hello Hello Hello Hello";
-    char output[LZ4_COMPRESSBOUND(4096)];
-    int r;
-
-    (void)argc;
-    (void)argv;
-
-    if ((r = test_compress(input, sizeof(input), output, sizeof(output)))) {
-        return r;
-    }
-    if ((r = test_decompress(input, output))) {
-        return r;
-    }
-    return 0;
-}
diff --git a/tests/frametest.c b/tests/frametest.c
index e2e0f86..4efeb6f 100644
--- a/tests/frametest.c
+++ b/tests/frametest.c
@@ -40,7 +40,12 @@
 #include <stdio.h>      /* fprintf */
 #include <string.h>     /* strcmp */
 #include <time.h>       /* clock_t, clock(), CLOCKS_PER_SEC */
-#include "lz4frame_static.h"
+#include <assert.h>
+#include "lz4frame.h"   /* include multiple times to test correctness/safety */
+#include "lz4frame.h"
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#include "lz4frame.h"
 #include "lz4.h"        /* LZ4_VERSION_STRING */
 #define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"     /* XXH64 */
@@ -67,7 +72,6 @@
 #define GB *(1U<<30)
 
 static const U32 nbTestsDefault = 256 KB;
-#define COMPRESSIBLE_NOISE_LENGTH (2 MB)
 #define FUZ_COMPRESSIBILITY_DEFAULT 50
 static const U32 prime1 = 2654435761U;
 static const U32 prime2 = 2246822519U;
@@ -164,9 +168,12 @@
 /*-*******************************************************
 *  Tests
 *********************************************************/
+#define CHECK_V(v,f) v = f; if (LZ4F_isError(v)) { fprintf(stderr, "%s\n", LZ4F_getErrorName(v)); goto _output_error; }
+#define CHECK(f)   { LZ4F_errorCode_t const CHECK_V(err_ , f); }
+
 int basicTests(U32 seed, double compressibility)
 {
-    int testResult = 0;
+#define COMPRESSIBLE_NOISE_LENGTH (2 MB)
     void* const CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
     size_t const cBuffSize = LZ4F_compressFrameBound(COMPRESSIBLE_NOISE_LENGTH, NULL);
     void* const compressedBuffer = malloc(cBuffSize);
@@ -176,9 +183,10 @@
     LZ4F_decompressionContext_t dCtx = NULL;
     LZ4F_compressionContext_t cctx = NULL;
     U64 crcOrig;
-
+    int basicTests_error = 0;
     LZ4F_preferences_t prefs;
     memset(&prefs, 0, sizeof(prefs));
+
     if (!CNBuffer || !compressedBuffer || !decodedBuffer) {
         DISPLAY("allocation error, not enough memory to start fuzzer tests \n");
         goto _output_error;
@@ -195,47 +203,47 @@
 
     /* Special case : null-content frame */
     testSize = 0;
-    DISPLAYLEVEL(3, "LZ4F_compressFrame, compress null content : \n");
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, NULL);
-    if (LZ4F_isError(cSize)) goto _output_error;
-    DISPLAYLEVEL(3, "Compressed null content into a %i bytes frame \n", (int)cSize);
+    DISPLAYLEVEL(3, "LZ4F_compressFrame, compress null content : ");
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, NULL));
+    DISPLAYLEVEL(3, "null content encoded into a %u bytes frame \n", (unsigned)cSize);
 
     DISPLAYLEVEL(3, "LZ4F_createDecompressionContext \n");
-    { LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
-      if (LZ4F_isError(errorCode)) goto _output_error; }
+    CHECK ( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) );
 
     DISPLAYLEVEL(3, "LZ4F_getFrameInfo on null-content frame (#157) \n");
     {   size_t avail_in = cSize;
         LZ4F_frameInfo_t frame_info;
-        LZ4F_errorCode_t const errorCode = LZ4F_getFrameInfo(dCtx, &frame_info, compressedBuffer, &avail_in);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK( LZ4F_getFrameInfo(dCtx, &frame_info, compressedBuffer, &avail_in) );
     }
 
     DISPLAYLEVEL(3, "LZ4F_freeDecompressionContext \n");
-    { LZ4F_errorCode_t const errorCode = LZ4F_freeDecompressionContext(dCtx);
-      if (LZ4F_isError(errorCode)) goto _output_error; }
+    CHECK( LZ4F_freeDecompressionContext(dCtx) );
     dCtx = NULL;
 
     /* test one-pass frame compression */
     testSize = COMPRESSIBLE_NOISE_LENGTH;
-    DISPLAYLEVEL(3, "LZ4F_compressFrame, using default preferences : \n");
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, NULL);
-    if (LZ4F_isError(cSize)) goto _output_error;
+
+    DISPLAYLEVEL(3, "LZ4F_compressFrame, using fast level -3 : ");
+    {   LZ4F_preferences_t fastCompressPrefs;
+        memset(&fastCompressPrefs, 0, sizeof(fastCompressPrefs));
+        fastCompressPrefs.compressionLevel = -3;
+        CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, &fastCompressPrefs));
+        DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize);
+    }
+
+    DISPLAYLEVEL(3, "LZ4F_compressFrame, using default preferences : ");
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, NULL));
     DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize);
 
     DISPLAYLEVEL(3, "Decompression test : \n");
     {   size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH;
         size_t compressedBufferSize = cSize;
-        BYTE* ip = (BYTE*)compressedBuffer;
-        BYTE* const iend = (BYTE*)compressedBuffer + cSize;
 
-        LZ4F_errorCode_t errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) );
 
-        DISPLAYLEVEL(3, "Single Pass decompression : \n");
-        { size_t const decompressError = LZ4F_decompress(dCtx, decodedBuffer, &decodedBufferSize, compressedBuffer, &compressedBufferSize, NULL);
-          if (LZ4F_isError(decompressError)) goto _output_error; }
-        { U64 const crcDest = XXH64(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, 1);
+        DISPLAYLEVEL(3, "Single Pass decompression : ");
+        CHECK( LZ4F_decompress(dCtx, decodedBuffer, &decodedBufferSize, compressedBuffer, &compressedBufferSize, NULL) );
+        { U64 const crcDest = XXH64(decodedBuffer, decodedBufferSize, 1);
           if (crcDest != crcOrig) goto _output_error; }
         DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedBufferSize);
 
@@ -248,8 +256,7 @@
             BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH;
             size_t decResult, oSize = COMPRESSIBLE_NOISE_LENGTH;
             DISPLAYLEVEL(3, "Missing last %u bytes : ", (U32)missingBytes);
-            decResult = LZ4F_decompress(dCtx, op, &oSize, cBuff, &iSize, NULL);
-            if (LZ4F_isError(decResult)) goto _output_error;
+            CHECK_V(decResult, LZ4F_decompress(dCtx, op, &oSize, cBuff, &iSize, NULL));
             if (decResult != missingBytes) {
                 DISPLAY("%u bytes missing != %u bytes requested \n", (U32)missingBytes, (U32)decResult);
                 goto _output_error;
@@ -270,72 +277,93 @@
         {   size_t oSize = 0;
             size_t iSize = 0;
             LZ4F_frameInfo_t fi;
+            const BYTE* ip = (BYTE*)compressedBuffer;
 
             DISPLAYLEVEL(3, "Start by feeding 0 bytes, to get next input size : ");
-            errorCode = LZ4F_decompress(dCtx, NULL, &oSize, ip, &iSize, NULL);
-            if (LZ4F_isError(errorCode)) goto _output_error;
-            DISPLAYLEVEL(3, " %u  \n", (unsigned)errorCode);
+            CHECK( LZ4F_decompress(dCtx, NULL, &oSize, ip, &iSize, NULL) );
+            //DISPLAYLEVEL(3, " %u  \n", (unsigned)errorCode);
+            DISPLAYLEVEL(3, " OK  \n");
 
-            DISPLAYLEVEL(3, "get FrameInfo on null input : ");
-            errorCode = LZ4F_getFrameInfo(dCtx, &fi, ip, &iSize);
-            if (errorCode != (size_t)-LZ4F_ERROR_frameHeader_incomplete) goto _output_error;
-            DISPLAYLEVEL(3, " correctly failed : %s \n", LZ4F_getErrorName(errorCode));
+            DISPLAYLEVEL(3, "LZ4F_getFrameInfo on zero-size input : ");
+            {   size_t nullSize = 0;
+                size_t const fiError = LZ4F_getFrameInfo(dCtx, &fi, ip, &nullSize);
+                if (LZ4F_getErrorCode(fiError) != LZ4F_ERROR_frameHeader_incomplete) {
+                    DISPLAYLEVEL(3, "incorrect error : %s != ERROR_frameHeader_incomplete \n",
+                                    LZ4F_getErrorName(fiError));
+                    goto _output_error;
+                }
+                DISPLAYLEVEL(3, " correctly failed : %s \n", LZ4F_getErrorName(fiError));
+            }
 
-            DISPLAYLEVEL(3, "get FrameInfo on not enough input : ");
-            iSize = 6;
-            errorCode = LZ4F_getFrameInfo(dCtx, &fi, ip, &iSize);
-            if (errorCode != (size_t)-LZ4F_ERROR_frameHeader_incomplete) goto _output_error;
-            DISPLAYLEVEL(3, " correctly failed : %s \n", LZ4F_getErrorName(errorCode));
-            ip += iSize;
+            DISPLAYLEVEL(3, "LZ4F_getFrameInfo on not enough input : ");
+            {   size_t inputSize = 6;
+                size_t const fiError = LZ4F_getFrameInfo(dCtx, &fi, ip, &inputSize);
+                if (LZ4F_getErrorCode(fiError) != LZ4F_ERROR_frameHeader_incomplete) {
+                    DISPLAYLEVEL(3, "incorrect error : %s != ERROR_frameHeader_incomplete \n", LZ4F_getErrorName(fiError));
+                    goto _output_error;
+                }
+                DISPLAYLEVEL(3, " correctly failed : %s \n", LZ4F_getErrorName(fiError));
+            }
 
-            DISPLAYLEVEL(3, "get FrameInfo on enough input : ");
+            DISPLAYLEVEL(3, "LZ4F_getFrameInfo on enough input : ");
             iSize = 15 - iSize;
-            errorCode = LZ4F_getFrameInfo(dCtx, &fi, ip, &iSize);
-            if (LZ4F_isError(errorCode)) goto _output_error;
+            CHECK( LZ4F_getFrameInfo(dCtx, &fi, ip, &iSize) );
             DISPLAYLEVEL(3, " correctly decoded \n");
             ip += iSize;
         }
 
-        DISPLAYLEVEL(3, "Byte after byte : \n");
+        DISPLAYLEVEL(3, "Decode a buggy input : ");
+        assert(COMPRESSIBLE_NOISE_LENGTH > 64);
+        assert(cSize > 48);
+        memcpy(decodedBuffer, (char*)compressedBuffer+16, 32);  /* save correct data */
+        memcpy((char*)compressedBuffer+16, (const char*)decodedBuffer+32, 32);  /* insert noise */
+        {   size_t dbSize = COMPRESSIBLE_NOISE_LENGTH;
+            size_t cbSize = cSize;
+            size_t const decompressError = LZ4F_decompress(dCtx, decodedBuffer, &dbSize,
+                                                               compressedBuffer, &cbSize,
+                                                               NULL);
+            if (!LZ4F_isError(decompressError)) goto _output_error;
+            DISPLAYLEVEL(3, "error detected : %s \n", LZ4F_getErrorName(decompressError));
+        }
+        memcpy((char*)compressedBuffer+16, decodedBuffer, 32);  /* restore correct data */
+
+        DISPLAYLEVEL(3, "Reset decompression context, since it's left in error state \n");
+        LZ4F_resetDecompressionContext(dCtx);   /* always successful */
+
+        DISPLAYLEVEL(3, "Byte after byte : ");
         {   BYTE* const ostart = (BYTE*)decodedBuffer;
             BYTE* op = ostart;
             BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH;
+            const BYTE* ip = (const BYTE*) compressedBuffer;
+            const BYTE* const iend = ip + cSize;
             while (ip < iend) {
                 size_t oSize = oend-op;
                 size_t iSize = 1;
-                errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL);
-                if (LZ4F_isError(errorCode)) goto _output_error;
+                CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
                 op += oSize;
                 ip += iSize;
             }
             { U64 const crcDest = XXH64(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, 1);
               if (crcDest != crcOrig) goto _output_error; }
             DISPLAYLEVEL(3, "Regenerated %u/%u bytes \n", (unsigned)(op-ostart), COMPRESSIBLE_NOISE_LENGTH);
-            }
-
-        errorCode = LZ4F_freeDecompressionContext(dCtx);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        dCtx = NULL;
+        }
     }
 
-    DISPLAYLEVEL(3, "Using 64 KB block : \n");
+    DISPLAYLEVEL(3, "Using 64 KB block : ");
     prefs.frameInfo.blockSizeID = LZ4F_max64KB;
     prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs);
-    if (LZ4F_isError(cSize)) goto _output_error;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs));
     DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
 
-    DISPLAYLEVEL(3, "without checksum : \n");
+    DISPLAYLEVEL(3, "without checksum : ");
     prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum;
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs);
-    if (LZ4F_isError(cSize)) goto _output_error;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs));
     DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
 
-    DISPLAYLEVEL(3, "Using 256 KB block : \n");
+    DISPLAYLEVEL(3, "Using 256 KB block : ");
     prefs.frameInfo.blockSizeID = LZ4F_max256KB;
     prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs);
-    if (LZ4F_isError(cSize)) goto _output_error;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs));
     DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
 
     DISPLAYLEVEL(3, "Decompression test : \n");
@@ -347,17 +375,13 @@
         const BYTE* ip = (const BYTE*)compressedBuffer;
         const BYTE* const iend = (const BYTE*)compressedBuffer + cSize;
 
-        { LZ4F_errorCode_t const createError = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
-          if (LZ4F_isError(createError)) goto _output_error; }
-
-        DISPLAYLEVEL(3, "random segment sizes : \n");
+        DISPLAYLEVEL(3, "random segment sizes : ");
         while (ip < iend) {
             unsigned const nbBits = FUZ_rand(&randState) % maxBits;
             size_t iSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1;
             size_t oSize = oend-op;
             if (iSize > (size_t)(iend-ip)) iSize = iend-ip;
-            { size_t const decompressError = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL);
-              if (LZ4F_isError(decompressError)) goto _output_error; }
+            CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
             op += oSize;
             ip += iSize;
         }
@@ -367,99 +391,273 @@
             DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
          }
 
-        { LZ4F_errorCode_t const freeError = LZ4F_freeDecompressionContext(dCtx);
-          if (LZ4F_isError(freeError)) goto _output_error; }
+        CHECK( LZ4F_freeDecompressionContext(dCtx) );
         dCtx = NULL;
     }
 
-    DISPLAYLEVEL(3, "without checksum : \n");
+    DISPLAYLEVEL(3, "without checksum : ");
     prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum;
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs);
-    if (LZ4F_isError(cSize)) goto _output_error;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) );
     DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
 
-    DISPLAYLEVEL(3, "Using 1 MB block : \n");
+    DISPLAYLEVEL(3, "Using 1 MB block : ");
     prefs.frameInfo.blockSizeID = LZ4F_max1MB;
     prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs);
-    if (LZ4F_isError(cSize)) goto _output_error;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) );
     DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
 
-    DISPLAYLEVEL(3, "without checksum : \n");
+    DISPLAYLEVEL(3, "without frame checksum : ");
     prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum;
-    cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs);
-    if (LZ4F_isError(cSize)) goto _output_error;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) );
     DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
 
-    DISPLAYLEVEL(3, "Using 4 MB block : \n");
+    DISPLAYLEVEL(3, "Using 4 MB block : ");
     prefs.frameInfo.blockSizeID = LZ4F_max4MB;
     prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
     {   size_t const dstCapacity = LZ4F_compressFrameBound(testSize, &prefs);
-        DISPLAYLEVEL(4, "dstCapacity = %u  \n", (U32)dstCapacity)
-        cSize = LZ4F_compressFrame(compressedBuffer, dstCapacity, CNBuffer, testSize, &prefs);
-        if (LZ4F_isError(cSize)) goto _output_error;
+        DISPLAYLEVEL(4, "dstCapacity = %u  ; ", (U32)dstCapacity)
+        CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, dstCapacity, CNBuffer, testSize, &prefs) );
         DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize);
     }
 
-    DISPLAYLEVEL(3, "without checksum : \n");
+    DISPLAYLEVEL(3, "without frame checksum : ");
     prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum;
     {   size_t const dstCapacity = LZ4F_compressFrameBound(testSize, &prefs);
-        DISPLAYLEVEL(4, "dstCapacity = %u  \n", (U32)dstCapacity)
-        cSize = LZ4F_compressFrame(compressedBuffer, dstCapacity, CNBuffer, testSize, &prefs);
-        if (LZ4F_isError(cSize)) goto _output_error;
+        DISPLAYLEVEL(4, "dstCapacity = %u  ; ", (U32)dstCapacity)
+        CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, dstCapacity, CNBuffer, testSize, &prefs) );
         DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize);
     }
 
-    {   size_t errorCode;
+    DISPLAYLEVEL(3, "LZ4F_compressFrame with block checksum : ");
+    memset(&prefs, 0, sizeof(prefs));
+    prefs.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled;
+    CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) );
+    DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize);
+
+    DISPLAYLEVEL(3, "Decompress with block checksum : ");
+    {   size_t iSize = cSize;
+        size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+        LZ4F_decompressionContext_t dctx;
+        CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+        CHECK( LZ4F_decompress(dctx, decodedBuffer, &decodedSize, compressedBuffer, &iSize, NULL) );
+        if (decodedSize != testSize) goto _output_error;
+        if (iSize != cSize) goto _output_error;
+        {   U64 const crcDest = XXH64(decodedBuffer, decodedSize, 1);
+            U64 const crcSrc = XXH64(CNBuffer, testSize, 1);
+            if (crcDest != crcSrc) goto _output_error;
+        }
+        DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+
+        CHECK( LZ4F_freeDecompressionContext(dctx) );
+    }
+
+    /* frame content size tests */
+    {   size_t cErr;
         BYTE* const ostart = (BYTE*)compressedBuffer;
         BYTE* op = ostart;
-        errorCode = LZ4F_createCompressionContext(&cctx, LZ4F_VERSION);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
 
-        DISPLAYLEVEL(3, "compress without frameSize : \n");
+        DISPLAYLEVEL(3, "compress without frameSize : ");
         memset(&(prefs.frameInfo), 0, sizeof(prefs.frameInfo));
-        errorCode = LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        op += errorCode;
-        errorCode = LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        op += errorCode;
-        errorCode = LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs));
+        op += cErr;
+        CHECK_V(cErr, LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL));
+        op += cErr;
+        CHECK( LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL) );
         DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)(op-ostart));
 
-        DISPLAYLEVEL(3, "compress with frameSize : \n");
+        DISPLAYLEVEL(3, "compress with frameSize : ");
         prefs.frameInfo.contentSize = testSize;
         op = ostart;
-        errorCode = LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        op += errorCode;
-        errorCode = LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        op += errorCode;
-        errorCode = LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs));
+        op += cErr;
+        CHECK_V(cErr, LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL));
+        op += cErr;
+        CHECK( LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL) );
         DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)(op-ostart));
 
-        DISPLAYLEVEL(3, "compress with wrong frameSize : \n");
+        DISPLAYLEVEL(3, "compress with wrong frameSize : ");
         prefs.frameInfo.contentSize = testSize+1;
         op = ostart;
-        errorCode = LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        op += errorCode;
-        errorCode = LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL);
-        if (LZ4F_isError(errorCode)) goto _output_error;
-        op += errorCode;
-        errorCode = LZ4F_compressEnd(cctx, op, testSize, NULL);
-        if (LZ4F_isError(errorCode)) { DISPLAYLEVEL(3, "Error correctly detected : %s \n", LZ4F_getErrorName(errorCode)); }
-        else
-            goto _output_error;
+        CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs));
+        op += cErr;
+        CHECK_V(cErr, LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL));
+        op += cErr;
+        cErr = LZ4F_compressEnd(cctx, op, testSize, NULL);
+        if (!LZ4F_isError(cErr)) goto _output_error;
+        DISPLAYLEVEL(3, "Error correctly detected : %s \n", LZ4F_getErrorName(cErr));
 
-        errorCode = LZ4F_freeCompressionContext(cctx);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK( LZ4F_freeCompressionContext(cctx) );
         cctx = NULL;
     }
 
+    /* dictID tests */
+    {   size_t cErr;
+        U32 const dictID = 0x99;
+        CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
+
+        DISPLAYLEVEL(3, "insert a dictID : ");
+        memset(&prefs.frameInfo, 0, sizeof(prefs.frameInfo));
+        prefs.frameInfo.dictID = dictID;
+        CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs));
+        DISPLAYLEVEL(3, "created frame header of size %i bytes  \n", (int)cErr);
+
+        DISPLAYLEVEL(3, "read a dictID : ");
+        CHECK( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) );
+        memset(&prefs.frameInfo, 0, sizeof(prefs.frameInfo));
+        CHECK( LZ4F_getFrameInfo(dCtx, &prefs.frameInfo, compressedBuffer, &cErr) );
+        if (prefs.frameInfo.dictID != dictID) goto _output_error;
+        DISPLAYLEVEL(3, "%u \n", (U32)prefs.frameInfo.dictID);
+
+        CHECK( LZ4F_freeDecompressionContext(dCtx) ); dCtx = NULL;
+        CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL;
+    }
+
+    /* Dictionary compression test */
+    {   size_t const dictSize = 63 KB;
+        size_t const dstCapacity = LZ4F_compressFrameBound(dictSize, NULL);
+        size_t cSizeNoDict, cSizeWithDict;
+        LZ4F_CDict* const cdict = LZ4F_createCDict(CNBuffer, dictSize);
+        if (cdict == NULL) goto _output_error;
+        CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
+        
+        DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with NULL dict : ");
+        CHECK_V(cSizeNoDict,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
+                                              CNBuffer, dictSize,
+                                              NULL, NULL) );
+        DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeNoDict);
+
+        CHECK( LZ4F_freeCompressionContext(cctx) );
+        CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
+        DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict : ");
+        CHECK_V(cSizeWithDict,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
+                                              CNBuffer, dictSize,
+                                              cdict, NULL) );
+        DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
+                        (unsigned)dictSize, (unsigned)cSizeWithDict);
+        if (cSizeWithDict >= cSizeNoDict) goto _output_error;  /* must be more efficient */
+        crcOrig = XXH64(CNBuffer, dictSize, 0);
+
+        DISPLAYLEVEL(3, "LZ4F_decompress_usingDict : ");
+        {   LZ4F_dctx* dctx;
+            size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+            size_t compressedSize = cSizeWithDict;
+            CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+            CHECK( LZ4F_decompress_usingDict(dctx,
+                                        decodedBuffer, &decodedSize,
+                                        compressedBuffer, &compressedSize,
+                                        CNBuffer, dictSize,
+                                        NULL) );
+            if (compressedSize != cSizeWithDict) goto _output_error;
+            if (decodedSize != dictSize) goto _output_error;
+            { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0);
+              if (crcDest != crcOrig) goto _output_error; }
+            DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+            CHECK( LZ4F_freeDecompressionContext(dctx) );
+        }
+
+        DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, negative level : ");
+        {   size_t cSizeLevelMax;
+            LZ4F_preferences_t cParams;
+            memset(&cParams, 0, sizeof(cParams));
+            cParams.compressionLevel = -3;
+            CHECK_V(cSizeLevelMax,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
+                                              CNBuffer, dictSize,
+                                              cdict, &cParams) );
+            DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax);
+        }
+
+        DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, level max : ");
+        {   size_t cSizeLevelMax;
+            LZ4F_preferences_t cParams;
+            memset(&cParams, 0, sizeof(cParams));
+            cParams.compressionLevel = LZ4F_compressionLevel_max();
+            CHECK_V(cSizeLevelMax,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
+                                              CNBuffer, dictSize,
+                                              cdict, &cParams) );
+            DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax);
+        }
+
+        DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, multiple linked blocks : ");
+        {   size_t cSizeContiguous;
+            size_t const inSize = dictSize * 3;
+            size_t const outCapacity = LZ4F_compressFrameBound(inSize, NULL);
+            LZ4F_preferences_t cParams;
+            memset(&cParams, 0, sizeof(cParams));
+            cParams.frameInfo.blockMode = LZ4F_blockLinked;
+            cParams.frameInfo.blockSizeID = LZ4F_max64KB;
+            CHECK_V(cSizeContiguous,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, outCapacity,
+                                              CNBuffer, inSize,
+                                              cdict, &cParams) );
+            DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
+                        (unsigned)inSize, (unsigned)cSizeContiguous);
+
+            DISPLAYLEVEL(3, "LZ4F_decompress_usingDict on multiple linked blocks : ");
+            {   LZ4F_dctx* dctx;
+                size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+                size_t compressedSize = cSizeContiguous;
+                CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+                CHECK( LZ4F_decompress_usingDict(dctx,
+                                            decodedBuffer, &decodedSize,
+                                            compressedBuffer, &compressedSize,
+                                            CNBuffer, dictSize,
+                                            NULL) );
+                if (compressedSize != cSizeContiguous) goto _output_error;
+                if (decodedSize != inSize) goto _output_error;
+                crcOrig = XXH64(CNBuffer, inSize, 0);
+                { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0);
+                  if (crcDest != crcOrig) goto _output_error; }
+                DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+                CHECK( LZ4F_freeDecompressionContext(dctx) );
+            }
+        }
+
+
+        DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, multiple independent blocks : ");
+        {   size_t cSizeIndep;
+            size_t const inSize = dictSize * 3;
+            size_t const outCapacity = LZ4F_compressFrameBound(inSize, NULL);
+            LZ4F_preferences_t cParams;
+            memset(&cParams, 0, sizeof(cParams));
+            cParams.frameInfo.blockMode = LZ4F_blockIndependent;
+            cParams.frameInfo.blockSizeID = LZ4F_max64KB;
+            CHECK_V(cSizeIndep,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, outCapacity,
+                                              CNBuffer, inSize,
+                                              cdict, &cParams) );
+            DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
+                        (unsigned)inSize, (unsigned)cSizeIndep);
+
+            DISPLAYLEVEL(3, "LZ4F_decompress_usingDict on multiple independent blocks : ");
+            {   LZ4F_dctx* dctx;
+                size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+                size_t compressedSize = cSizeIndep;
+                CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+                CHECK( LZ4F_decompress_usingDict(dctx,
+                                            decodedBuffer, &decodedSize,
+                                            compressedBuffer, &compressedSize,
+                                            CNBuffer, dictSize,
+                                            NULL) );
+                if (compressedSize != cSizeIndep) goto _output_error;
+                if (decodedSize != inSize) goto _output_error;
+                crcOrig = XXH64(CNBuffer, inSize, 0);
+                { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0);
+                  if (crcDest != crcOrig) goto _output_error; }
+                DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+                CHECK( LZ4F_freeDecompressionContext(dctx) );
+            }
+        }
+
+        LZ4F_freeCDict(cdict);
+        CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL;
+    }
+
+
     DISPLAYLEVEL(3, "Skippable frame test : \n");
     {   size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH;
         unsigned maxBits = FUZ_highbit((U32)decodedBufferSize);
@@ -468,8 +666,7 @@
         BYTE* ip = (BYTE*)compressedBuffer;
         BYTE* iend = (BYTE*)compressedBuffer + cSize + 8;
 
-        LZ4F_errorCode_t errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
-        if (LZ4F_isError(errorCode)) goto _output_error;
+        CHECK( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) );
 
         /* generate skippable frame */
         FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START);
@@ -481,8 +678,7 @@
             size_t iSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1;
             size_t oSize = oend-op;
             if (iSize > (size_t)(iend-ip)) iSize = iend-ip;
-            errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL);
-            if (LZ4F_isError(errorCode)) goto _output_error;
+            CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
             op += oSize;
             ip += iSize;
         }
@@ -497,12 +693,11 @@
         iend = ip+8;
 
         while (ip < iend) {
-            unsigned nbBits = FUZ_rand(&randState) % maxBits;
+            unsigned const nbBits = FUZ_rand(&randState) % maxBits;
             size_t iSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1;
             size_t oSize = oend-op;
             if (iSize > (size_t)(iend-ip)) iSize = iend-ip;
-            errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL);
-            if (LZ4F_isError(errorCode)) goto _output_error;
+            CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
             op += oSize;
             ip += iSize;
         }
@@ -518,8 +713,7 @@
             size_t iSize = 10;
             size_t oSize = 10;
             if (iSize > (size_t)(iend-ip)) iSize = iend-ip;
-            errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL);
-            if (LZ4F_isError(errorCode)) goto _output_error;
+            CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
             op += oSize;
             ip += iSize;
         }
@@ -533,10 +727,10 @@
     free(decodedBuffer);
     LZ4F_freeDecompressionContext(dCtx); dCtx = NULL;
     LZ4F_freeCompressionContext(cctx); cctx = NULL;
-    return testResult;
+    return basicTests_error;
 
 _output_error:
-    testResult = 1;
+    basicTests_error = 1;
     DISPLAY("Error detected ! \n");
     goto _end;
 }
@@ -544,15 +738,18 @@
 
 static void locateBuffDiff(const void* buff1, const void* buff2, size_t size, unsigned nonContiguous)
 {
-    int p=0;
+    size_t p=0;
     const BYTE* b1=(const BYTE*)buff1;
     const BYTE* b2=(const BYTE*)buff2;
+    DISPLAY("locateBuffDiff: looking for error position \n");
     if (nonContiguous) {
-        DISPLAY("Non-contiguous output test (%i bytes)\n", (int)size);
+        DISPLAY("mode %u: non-contiguous output (%zu bytes), cannot search \n", nonContiguous, size);
         return;
     }
-    while (b1[p]==b2[p]) p++;
-    DISPLAY("Error at pos %i/%i : %02X != %02X \n", p, (int)size, b1[p], b2[p]);
+    while (p < size && b1[p]==b2[p]) p++;
+    if (p != size) {
+        DISPLAY("Error at pos %i/%i : %02X != %02X \n", (int)p, (int)size, b1[p], b2[p]);
+    }
 }
 
 
@@ -571,6 +768,7 @@
     size_t result;
     clock_t const startClock = clock();
     clock_t const clockDuration = duration_s * CLOCKS_PER_SEC;
+#   undef CHECK
 #   define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
                             DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
 
@@ -607,10 +805,11 @@
         memset(&prefs, 0, sizeof(prefs));
         prefs.frameInfo.blockMode = (LZ4F_blockMode_t)(FUZ_rand(&randState) & 1);
         prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)(4 + (FUZ_rand(&randState) & 3));
+        prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)(FUZ_rand(&randState) & 1);
         prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)(FUZ_rand(&randState) & 1);
         prefs.frameInfo.contentSize = ((FUZ_rand(&randState) & 0xF) == 1) ? srcSize : 0;
         prefs.autoFlush = neverFlush ? 0 : (FUZ_rand(&randState) & 7) == 2;
-        prefs.compressionLevel = FUZ_rand(&randState) % 5;
+        prefs.compressionLevel = -5 + (int)(FUZ_rand(&randState) % 11);
         if ((FUZ_rand(&randState) & 0xF) == 1) prefsPtr = NULL;
 
         DISPLAYUPDATE(2, "\r%5u   ", testNb);
@@ -641,6 +840,8 @@
                 size_t const iSize = MIN(sampleMax, (size_t)(iend-ip));
                 size_t const oSize = LZ4F_compressBound(iSize, prefsPtr);
                 cOptions.stableSrc = ((FUZ_rand(&randState) & 3) == 1);
+                DISPLAYLEVEL(6, "Sending %zi bytes to compress (stableSrc:%u) \n",
+                                iSize, cOptions.stableSrc);
 
                 result = LZ4F_compressUpdate(cCtx, op, oSize, ip, iSize, &cOptions);
                 CHECK(LZ4F_isError(result), "Compression failed (error %i : %s)", (int)result, LZ4F_getErrorName(result));
@@ -685,7 +886,8 @@
                 dOptions.stableDst = FUZ_rand(&randState) & 1;
                 if (nonContiguousDst==2) dOptions.stableDst = 0;   /* overwrite mode */
                 result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, &dOptions);
-                if (LZ4F_getErrorCode(result) == LZ4F_ERROR_contentChecksum_invalid) locateBuffDiff(srcStart, decodedBuffer, srcSize, nonContiguousDst);
+                if (LZ4F_getErrorCode(result) == LZ4F_ERROR_contentChecksum_invalid)
+                    locateBuffDiff(srcStart, decodedBuffer, srcSize, nonContiguousDst);
                 CHECK(LZ4F_isError(result), "Decompression failed (error %i:%s)", (int)result, LZ4F_getErrorName(result));
                 XXH64_update(&xxh64, op, (U32)oSize);
                 totalOut += oSize;
diff --git a/tests/fullbench.c b/tests/fullbench.c
index f489392..fd1202d 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -184,6 +184,11 @@
     return LZ4_compress_default(in, out, inSize, LZ4_compressBound(inSize)-1);
 }
 
+static int local_LZ4_compress_destSize(const char* in, char* out, int inSize)
+{
+    return LZ4_compress_destSize(in, out, &inSize, LZ4_compressBound(inSize)-1);
+}
+
 static int local_LZ4_compress_fast0(const char* in, char* out, int inSize)
 {
     return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 0);
@@ -215,8 +220,16 @@
 }
 
 #ifndef LZ4_DLL_IMPORT
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /* declare hidden function */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+extern int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+
+#if defined (__cplusplus)
+}
+#endif
 
 static int local_LZ4_compress_forceDict(const char* in, char* out, int inSize)
 {
@@ -262,13 +275,20 @@
     return outSize;
 }
 
-static int local_LZ4_decompress_fast_usingDict(const char* in, char* out, int inSize, int outSize)
+static int local_LZ4_decompress_fast_usingDict_prefix(const char* in, char* out, int inSize, int outSize)
 {
     (void)inSize;
     LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65536);
     return outSize;
 }
 
+static int local_LZ4_decompress_fast_usingExtDict(const char* in, char* out, int inSize, int outSize)
+{
+    (void)inSize;
+    LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65535);
+    return outSize;
+}
+
 static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int inSize, int outSize)
 {
     (void)inSize;
@@ -277,7 +297,15 @@
 }
 
 #ifndef LZ4_DLL_IMPORT
-extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const char* dict, int dictSize);
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
 
 static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize)
 {
@@ -289,7 +317,9 @@
 
 static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize)
 {
-    return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize);
+    int result = LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize);
+    if (result < 0) return result;
+    return outSize;
 }
 
 
@@ -422,20 +452,21 @@
             case 0 : DISPLAY("Compression functions : \n"); continue;
             case 1 : compressionFunction = local_LZ4_compress_default_large; compressorName = "LZ4_compress_default"; break;
             case 2 : compressionFunction = local_LZ4_compress_default_small; compressorName = "LZ4_compress_default(small dst)"; break;
-            case 3 : compressionFunction = local_LZ4_compress_fast0; compressorName = "LZ4_compress_fast(0)"; break;
-            case 4 : compressionFunction = local_LZ4_compress_fast1; compressorName = "LZ4_compress_fast(1)"; break;
-            case 5 : compressionFunction = local_LZ4_compress_fast2; compressorName = "LZ4_compress_fast(2)"; break;
-            case 6 : compressionFunction = local_LZ4_compress_fast17; compressorName = "LZ4_compress_fast(17)"; break;
-            case 7 : compressionFunction = local_LZ4_compress_fast_extState0; compressorName = "LZ4_compress_fast_extState(0)"; break;
-            case 8 : compressionFunction = local_LZ4_compress_fast_continue0; initFunction = local_LZ4_createStream; compressorName = "LZ4_compress_fast_continue(0)"; break;
+            case 3 : compressionFunction = local_LZ4_compress_destSize; compressorName = "LZ4_compress_destSize"; break;
+            case 4 : compressionFunction = local_LZ4_compress_fast0; compressorName = "LZ4_compress_fast(0)"; break;
+            case 5 : compressionFunction = local_LZ4_compress_fast1; compressorName = "LZ4_compress_fast(1)"; break;
+            case 6 : compressionFunction = local_LZ4_compress_fast2; compressorName = "LZ4_compress_fast(2)"; break;
+            case 7 : compressionFunction = local_LZ4_compress_fast17; compressorName = "LZ4_compress_fast(17)"; break;
+            case 8 : compressionFunction = local_LZ4_compress_fast_extState0; compressorName = "LZ4_compress_fast_extState(0)"; break;
+            case 9 : compressionFunction = local_LZ4_compress_fast_continue0; initFunction = local_LZ4_createStream; compressorName = "LZ4_compress_fast_continue(0)"; break;
 
             case 10: compressionFunction = local_LZ4_compress_HC; compressorName = "LZ4_compress_HC"; break;
             case 12: compressionFunction = local_LZ4_compress_HC_extStateHC; compressorName = "LZ4_compress_HC_extStateHC"; break;
             case 14: compressionFunction = local_LZ4_compress_HC_continue; initFunction = local_LZ4_resetStreamHC; compressorName = "LZ4_compress_HC_continue"; break;
 #ifndef LZ4_DLL_IMPORT
-			case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break;
+            case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break;
 #endif
-			case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame";
+            case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame";
                         chunkP[0].origSize = (int)benchedSize; nbChunks=1;
                         break;
             case 40: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict";
@@ -454,7 +485,7 @@
                 double averageTime;
                 clock_t clockTime;
 
-                PROGRESS("%1i- %-28.28s :%9i ->\r", loopNb, compressorName, (int)benchedSize);
+                PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, compressorName, (int)benchedSize);
                 { size_t i; for (i=0; i<benchedSize; i++) compressed_buff[i]=(char)i; }     /* warming up memory */
 
                 nb_loops = 0;
@@ -465,7 +496,8 @@
                     if (initFunction!=NULL) initFunction();
                     for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
                         chunkP[chunkNb].compressedSize = compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize);
-                        if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1);
+                        if (chunkP[chunkNb].compressedSize==0)
+                            DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1);
                     }
                     nb_loops++;
                 }
@@ -476,13 +508,13 @@
                 if (averageTime < bestTime) bestTime = averageTime;
                 cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].compressedSize;
                 ratio = (double)cSize/(double)benchedSize*100.;
-                PROGRESS("%1i- %-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
+                PROGRESS("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
             }
 
             if (ratio<100.)
-                DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
+                DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
             else
-                DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000);
+                DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000);
         }
 
         /* Prepare layout for decompression */
@@ -503,14 +535,16 @@
         }
         for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
             chunkP[chunkNb].compressedSize = LZ4_compress_default(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize, maxCompressedChunkSize);
-            if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1);
+            if (chunkP[chunkNb].compressedSize==0)
+                DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1);
         }
 
         /* Decompression Algorithms */
-        for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && (g_decompressionTest); dAlgNb++) {
+        for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && g_decompressionTest; dAlgNb++) {
             const char* dName;
             int (*decompressionFunction)(const char*, char*, int, int);
             double bestTime = 100000000.;
+            int checkResult = 1;
 
             if ((g_decompressionAlgo != ALL_DECOMPRESSORS) && (g_decompressionAlgo != dAlgNb)) continue;
 
@@ -518,14 +552,15 @@
             {
             case 0: DISPLAY("Decompression functions : \n"); continue;
             case 1: decompressionFunction = local_LZ4_decompress_fast; dName = "LZ4_decompress_fast"; break;
-            case 3: decompressionFunction = local_LZ4_decompress_fast_usingDict; dName = "LZ4_decompress_fast_usingDict"; break;
+            case 2: decompressionFunction = local_LZ4_decompress_fast_usingDict_prefix; dName = "LZ4_decompress_fast_usingDict(prefix)"; break;
+            case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break;
             case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break;
             case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break;
-            case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break;
+            case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; checkResult = 0; break;
 #ifndef LZ4_DLL_IMPORT
-			case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break;
+            case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break;
 #endif
-			case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
+            case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
                     errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL);
                     if (LZ4F_isError(errorCode)) {
                         DISPLAY("Error while preparing compressed frame\n");
@@ -549,7 +584,7 @@
                 clock_t clockTime;
                 U32 crcDecoded;
 
-                PROGRESS("%1i- %-29.29s :%10i ->\r", loopNb, dName, (int)benchedSize);
+                PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, dName, (int)benchedSize);
 
                 nb_loops = 0;
                 clockTime = clock();
@@ -557,9 +592,13 @@
                 clockTime = clock();
                 while(BMK_GetClockSpan(clockTime) < TIMELOOP) {
                     for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
-                        int decodedSize = decompressionFunction(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkP[chunkNb].origSize);
-                        if (chunkP[chunkNb].origSize != decodedSize) DISPLAY("ERROR ! %s() == %i != %i !! \n", dName, decodedSize, chunkP[chunkNb].origSize), exit(1);
-                    }
+                        int const decodedSize = decompressionFunction(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer,
+                                                                      chunkP[chunkNb].compressedSize, chunkP[chunkNb].origSize);
+                        if (chunkP[chunkNb].origSize != decodedSize) {
+                            DISPLAY("ERROR ! %s() == %i != %i !! \n",
+                                    dName, decodedSize, chunkP[chunkNb].origSize);
+                            exit(1);
+                    }   }
                     nb_loops++;
                 }
                 clockTime = BMK_GetClockSpan(clockTime);
@@ -568,14 +607,17 @@
                 averageTime = (double)clockTime / nb_loops / CLOCKS_PER_SEC;
                 if (averageTime < bestTime) bestTime = averageTime;
 
-                PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
+                PROGRESS("%2i-%-34.34s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
 
                 /* CRC Checking */
                 crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
-                if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
-            }
+                if (checkResult && (crcOriginal!=crcDecoded)) {
+                    DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n",
+                            inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded);
+                    exit(1);
+            }   }
 
-            DISPLAY("%2i-%-29.29s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
+            DISPLAY("%2i-%-34.34s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
         }
       }
       free(orig_buff);
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index b129c96..b29e82e 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1,6 +1,6 @@
 /*
     fuzzer.c - Fuzzer test tool for LZ4
-    Copyright (C) Yann Collet 2012-2016
+    Copyright (C) Yann Collet 2012-2017
 
     GPL v2 License
 
@@ -32,16 +32,28 @@
 #  pragma warning(disable : 4310)    /* disable: C4310: constant char value > 127 */
 #endif
 
+#define LZ4_DISABLE_DEPRECATE_WARNINGS
+
 
 /*-************************************
 *  Dependencies
 **************************************/
+#if defined(__unix__) && !defined(_AIX)   /* must be included before platform.h for MAP_ANONYMOUS */
+#  include <sys/mman.h>   /* mmap */
+#endif
 #include "platform.h"   /* _CRT_SECURE_NO_WARNINGS */
 #include "util.h"       /* U32 */
 #include <stdlib.h>
 #include <stdio.h>      /* fgets, sscanf */
 #include <string.h>     /* strcmp */
 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h>
+#if defined(__unix__) && defined(_AIX)
+#  include <sys/mman.h>   /* mmap */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY
+#define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
 #define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"
@@ -238,6 +250,41 @@
 }
 
 
+#ifdef __unix__   /* is expected to be triggered on linux+gcc */
+
+static void* FUZ_createLowAddr(size_t size)
+{
+    void* const lowBuff = mmap((void*)(0x1000), size,
+                    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                    -1, 0);
+    DISPLAYLEVEL(2, "generating low buffer at address %p \n", lowBuff);
+    return lowBuff;
+}
+
+static void FUZ_freeLowAddr(void* buffer, size_t size)
+{
+    if (munmap(buffer, size)) {
+        perror("fuzzer: freeing low address buffer");
+        abort();
+    }
+}
+
+#else
+
+static void* FUZ_createLowAddr(size_t size)
+{
+    return malloc(size);
+}
+
+static void FUZ_freeLowAddr(void* buffer, size_t size)
+{
+    (void)size;
+    free(buffer);
+}
+
+#endif
+
+
 /*! FUZ_findDiff() :
 *   find the first different byte between buff1 and buff2.
 *   presumes buff1 != buff2.
@@ -250,7 +297,7 @@
     const BYTE* const b2 = (const BYTE*)buff2;
     size_t u = 0;
     while (b1[u]==b2[u]) u++;
-    DISPLAY("Wrong Byte at position %u \n", (unsigned)u);
+    DISPLAY("\nWrong Byte at position %u \n", (unsigned)u);
 }
 
 
@@ -264,6 +311,8 @@
     size_t const compressedBufferSize = LZ4_compressBound(FUZ_MAX_BLOCK_SIZE);
     char* const compressedBuffer = (char*)malloc(compressedBufferSize);
     char* const decodedBuffer = (char*)malloc(FUZ_MAX_DICT_SIZE + FUZ_MAX_BLOCK_SIZE);
+    size_t const labSize = 96 KB;
+    void* const lowAddrBuffer = FUZ_createLowAddr(labSize);
     void* const stateLZ4   = malloc(LZ4_sizeofState());
     void* const stateLZ4HC = malloc(LZ4_sizeofStateHC());
     LZ4_stream_t LZ4dict;
@@ -274,9 +323,21 @@
     int result = 0;
     unsigned cycleNb;
 
-#   define FUZ_CHECKTEST(cond, ...) if (cond) { printf("Test %u : ", testNb); printf(__VA_ARGS__); \
-                                                printf(" (seed %u, cycle %u) \n", seed, cycleNb); goto _output_error; }
-#   define FUZ_DISPLAYTEST          { testNb++; g_displayLevel>=4 ? printf("%2u\b\b", testNb), fflush(stdout) : 0; }
+#   define FUZ_CHECKTEST(cond, ...)                            \
+        if (cond) {                                            \
+            printf("Test %u : ", testNb); printf(__VA_ARGS__); \
+            printf(" (seed %u, cycle %u) \n", seed, cycleNb);  \
+            goto _output_error;                                \
+        }
+
+#   define FUZ_DISPLAYTEST(...) {                 \
+                testNb++;                         \
+                if (g_displayLevel>=4) {          \
+                    printf("\r%4u - %2u :", cycleNb, testNb);  \
+                    printf(" " __VA_ARGS__);      \
+                    printf("   ");                \
+                    fflush(stdout);               \
+            }   }
 
 
     /* init */
@@ -300,24 +361,28 @@
         U32 testNb = 0;
         U32 randState = FUZ_rand(&coreRandState) ^ PRIME3;
         int const blockSize  = (FUZ_rand(&randState) % (FUZ_MAX_BLOCK_SIZE-1)) + 1;
-        int const blockStart = FUZ_rand(&randState) % (COMPRESSIBLE_NOISE_LENGTH - blockSize);
+        int const blockStart = (FUZ_rand(&randState) % (COMPRESSIBLE_NOISE_LENGTH - blockSize - 1)) + 1;
         int const dictSizeRand = FUZ_rand(&randState) % FUZ_MAX_DICT_SIZE;
-        int const dictSize = MIN(dictSizeRand, blockStart);
+        int const dictSize = MIN(dictSizeRand, blockStart - 1);
         int const compressionLevel = FUZ_rand(&randState) % (LZ4HC_CLEVEL_MAX+1);
-        char* const block = ((char*)CNBuffer) + blockStart;
+        const char* block = ((char*)CNBuffer) + blockStart;
         const char* dict = block - dictSize;
         int compressedSize, HCcompressedSize;
         int blockContinueCompressedSize;
         U32 const crcOrig = XXH32(block, blockSize, 0);
-        U32 crcCheck;
         int ret;
 
         FUZ_displayUpdate(cycleNb);
 
         /* Compression tests */
+        if ( ((FUZ_rand(&randState) & 63) == 2)
+          && ((size_t)blockSize < labSize) ) {
+            memcpy(lowAddrBuffer, block, blockSize);
+            block = (const char*)lowAddrBuffer;
+        }
 
         /* Test compression destSize */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_destSize()");
         {   int srcSize = blockSize;
             int const targetSize = srcSize * ((FUZ_rand(&randState) & 127)+1) >> 7;
             char endCheck = FUZ_rand(&randState) & 255;
@@ -332,7 +397,7 @@
                 U32 const crcBase = XXH32(block, srcSize, 0);
                 char const canary = FUZ_rand(&randState) & 255;
                 FUZ_CHECKTEST((ret==0), "LZ4_compress_destSize() compression failed");
-                FUZ_DISPLAYTEST;
+                FUZ_DISPLAYTEST();
                 compressedSize = ret;
                 decodedBuffer[srcSize] = canary;
                 ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, srcSize);
@@ -343,88 +408,158 @@
                   FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data"); }
 
                 DISPLAYLEVEL(5, " OK \n");
-            }
-            else
+            } else {
                 DISPLAYLEVEL(5, " \n");
-        }
+        }   }
+
+        /* Test compression HC destSize */
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_destSize()");
+        {   int srcSize = blockSize;
+            int const targetSize = srcSize * ((FUZ_rand(&randState) & 127)+1) >> 7;
+            char const endCheck = FUZ_rand(&randState) & 255;
+            void* ctx = LZ4_createHC(block);
+            FUZ_CHECKTEST(ctx==NULL, "LZ4_createHC() allocation failed");
+            compressedBuffer[targetSize] = endCheck;
+            ret = LZ4_compress_HC_destSize(ctx, block, compressedBuffer, &srcSize, targetSize, compressionLevel);
+            DISPLAYLEVEL(5, "LZ4_compress_HC_destSize(%i): destSize : %7i/%7i; content%7i/%7i ",
+                            compressionLevel, ret, targetSize, srcSize, blockSize);
+            LZ4_freeHC(ctx);
+            FUZ_CHECKTEST(ret > targetSize, "LZ4_compress_HC_destSize() result larger than dst buffer !");
+            FUZ_CHECKTEST(compressedBuffer[targetSize] != endCheck, "LZ4_compress_HC_destSize() overwrite dst buffer !");
+            FUZ_CHECKTEST(srcSize > blockSize, "LZ4_compress_HC_destSize() fed more than src buffer !");
+            if (targetSize>0) {
+                /* check correctness */
+                U32 const crcBase = XXH32(block, srcSize, 0);
+                char const canary = FUZ_rand(&randState) & 255;
+                FUZ_CHECKTEST((ret==0), "LZ4_compress_HC_destSize() compression failed");
+                FUZ_DISPLAYTEST();
+                compressedSize = ret;
+                decodedBuffer[srcSize] = canary;
+                ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, srcSize);
+                FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe() failed on data compressed by LZ4_compressHC_destSize");
+                FUZ_CHECKTEST(ret!=srcSize, "LZ4_decompress_safe() failed : did not fully decompressed data");
+                FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe() overwrite dst buffer !");
+                {   U32 const crcDec = XXH32(decodedBuffer, srcSize, 0);
+                    FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data");
+                }
+                DISPLAYLEVEL(5, " OK \n");
+            } else {
+                DISPLAYLEVEL(5, " \n");
+        }   }
 
         /* Test compression HC */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC()");
         ret = LZ4_compress_HC(block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel);
-        FUZ_CHECKTEST(ret==0, "LZ4_compressHC() failed");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC() failed");
         HCcompressedSize = ret;
 
         /* Test compression HC using external state */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC()");
         ret = LZ4_compress_HC_extStateHC(stateLZ4HC, block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel);
-        FUZ_CHECKTEST(ret==0, "LZ4_compressHC_withStateHC() failed");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC() failed")
+
+        /* Test compression HC using fast reset external state */
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC_fastReset()");
+        ret = LZ4_compress_HC_extStateHC_fastReset(stateLZ4HC, block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel);
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC_fastReset() failed");
 
         /* Test compression using external state */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_extState()");
         ret = LZ4_compress_fast_extState(stateLZ4, block, compressedBuffer, blockSize, (int)compressedBufferSize, 8);
-        FUZ_CHECKTEST(ret==0, "LZ4_compress_withState() failed");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState() failed");
+
+        /* Test compression using fast reset external state*/
+        FUZ_DISPLAYTEST();
+        ret = LZ4_compress_fast_extState_fastReset(stateLZ4, block, compressedBuffer, blockSize, (int)compressedBufferSize, 8);
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState_fastReset() failed");
 
         /* Test compression */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_default()");
         ret = LZ4_compress_default(block, compressedBuffer, blockSize, (int)compressedBufferSize);
-        FUZ_CHECKTEST(ret==0, "LZ4_compress() failed");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_default() failed");
         compressedSize = ret;
 
         /* Decompression tests */
 
-        /* Test decoding with output size being exactly what's necessary => must work */
-        FUZ_DISPLAYTEST;
+        /* Test decoding with output size exactly correct => must work */
+        FUZ_DISPLAYTEST("LZ4_decompress_fast() with exact output buffer");
         ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_fast failed despite correct space");
         FUZ_CHECKTEST(ret!=compressedSize, "LZ4_decompress_fast failed : did not fully read compressed data");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data");
+        }
 
         /* Test decoding with one byte missing => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("LZ4_decompress_fast() with output buffer 1-byte too short");
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize-1);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast overrun specified output buffer");
 
         /* Test decoding with one byte too much => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize+1);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large");
 
+        /* Test decoding with empty input */
+        FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input");
+        LZ4_decompress_safe(compressedBuffer, decodedBuffer, 0, blockSize);
+
+        /* Test decoding with a one byte input */
+        FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input");
+        {   char const tmp = 0xFF;
+            LZ4_decompress_safe(&tmp, decodedBuffer, 1, blockSize);
+        }
+
+        /* Test decoding shortcut edge case */
+        FUZ_DISPLAYTEST("LZ4_decompress_safe() with shortcut edge case");
+        {   char tmp[17];
+            /* 14 bytes of literals, followed by a 14 byte match.
+             * Should not read beyond the end of the buffer.
+             * See https://github.com/lz4/lz4/issues/508. */
+            *tmp = 0xEE;
+            memset(tmp + 1, 0, 14);
+            tmp[15] = 14;
+            tmp[16] = 0;
+            ret = LZ4_decompress_safe(tmp, decodedBuffer, sizeof(tmp), blockSize);
+            FUZ_CHECKTEST(ret >= 0, "LZ4_decompress_safe() should fail");
+        }
+
+
         /* Test decoding with output size exactly what's necessary => must work */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space");
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        }
 
         // Test decoding with more than enough output size => must work
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         decodedBuffer[blockSize+1] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space");
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
-        //FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe wrote more than (unknown) target size");   // well, is that an issue ?
         FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        }
 
         // Test decoding with output size being one byte too short => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-1);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe overrun specified output buffer size");
 
         // Test decoding with output size being 10 bytes too short => must fail
-        FUZ_DISPLAYTEST;
-        if (blockSize>10)
-        {
+        FUZ_DISPLAYTEST();
+        if (blockSize>10) {
             decodedBuffer[blockSize-10] = 0;
             ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-10);
             FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being 10 bytes too short");
@@ -432,69 +567,70 @@
         }
 
         // Test decoding with input size being one byte too short => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize-1, blockSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short (blockSize=%i, ret=%i, compressedSize=%i)", blockSize, ret, compressedSize);
 
         // Test decoding with input size being one byte too large => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize+1, blockSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
 
-        // Test partial decoding with target output size being max/2 => must work
-        FUZ_DISPLAYTEST;
-        ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize/2, blockSize);
-        FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
-
-        // Test partial decoding with target output size being just below max => must work
-        FUZ_DISPLAYTEST;
-        ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize-3, blockSize);
-        FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
+        /* Test partial decoding => must work */
+        FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
+        {   size_t const missingBytes = FUZ_rand(&randState) % blockSize;
+            int const targetSize = (int)(blockSize - missingBytes);
+            char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
+            int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize);
+            FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
+            FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize);
+            FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize);
+        }
 
         /* Test Compression with limited output size */
 
         /* Test compression with output size being exactly what's necessary (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_default() with output buffer just the right size");
         ret = LZ4_compress_default(block, compressedBuffer, blockSize, compressedSize);
-        FUZ_CHECKTEST(ret==0, "LZ4_compress_limitedOutput() failed despite sufficient space");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_default() failed despite sufficient space");
 
         /* Test compression with output size being exactly what's necessary and external state (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_extState() with output buffer just the right size");
         ret = LZ4_compress_fast_extState(stateLZ4, block, compressedBuffer, blockSize, compressedSize, 1);
-        FUZ_CHECKTEST(ret==0, "LZ4_compress_limitedOutput_withState() failed despite sufficient space");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState() failed despite sufficient space");
 
         /* Test HC compression with output size being exactly what's necessary (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC() with output buffer just the right size");
         ret = LZ4_compress_HC(block, compressedBuffer, blockSize, HCcompressedSize, compressionLevel);
-        FUZ_CHECKTEST(ret==0, "LZ4_compressHC_limitedOutput() failed despite sufficient space");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC() failed despite sufficient space");
 
         /* Test HC compression with output size being exactly what's necessary (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC() with output buffer just the right size");
         ret = LZ4_compress_HC_extStateHC(stateLZ4HC, block, compressedBuffer, blockSize, HCcompressedSize, compressionLevel);
-        FUZ_CHECKTEST(ret==0, "LZ4_compressHC_limitedOutput_withStateHC() failed despite sufficient space");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC() failed despite sufficient space");
 
         /* Test compression with missing bytes into output buffer => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_default() with output buffer a bit too short");
         {   int missingBytes = (FUZ_rand(&randState) % 0x3F) + 1;
             if (missingBytes >= compressedSize) missingBytes = compressedSize-1;
             missingBytes += !missingBytes;   /* avoid special case missingBytes==0 */
             compressedBuffer[compressedSize-missingBytes] = 0;
             ret = LZ4_compress_default(block, compressedBuffer, blockSize, compressedSize-missingBytes);
-            FUZ_CHECKTEST(ret, "LZ4_compress_limitedOutput should have failed (output buffer too small by %i byte)", missingBytes);
-            FUZ_CHECKTEST(compressedBuffer[compressedSize-missingBytes], "LZ4_compress_limitedOutput overran output buffer ! (%i missingBytes)", missingBytes)
+            FUZ_CHECKTEST(ret, "LZ4_compress_default should have failed (output buffer too small by %i byte)", missingBytes);
+            FUZ_CHECKTEST(compressedBuffer[compressedSize-missingBytes], "LZ4_compress_default overran output buffer ! (%i missingBytes)", missingBytes)
         }
 
         /* Test HC compression with missing bytes into output buffer => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC() with output buffer a bit too short");
         {   int missingBytes = (FUZ_rand(&randState) % 0x3F) + 1;
             if (missingBytes >= HCcompressedSize) missingBytes = HCcompressedSize-1;
             missingBytes += !missingBytes;   /* avoid special case missingBytes==0 */
             compressedBuffer[HCcompressedSize-missingBytes] = 0;
             ret = LZ4_compress_HC(block, compressedBuffer, blockSize, HCcompressedSize-missingBytes, compressionLevel);
-            FUZ_CHECKTEST(ret, "LZ4_compressHC_limitedOutput should have failed (output buffer too small by %i byte)", missingBytes);
-            FUZ_CHECKTEST(compressedBuffer[HCcompressedSize-missingBytes], "LZ4_compressHC_limitedOutput overran output buffer ! (%i missingBytes)", missingBytes)
+            FUZ_CHECKTEST(ret, "LZ4_compress_HC should have failed (output buffer too small by %i byte)", missingBytes);
+            FUZ_CHECKTEST(compressedBuffer[HCcompressedSize-missingBytes], "LZ4_compress_HC overran output buffer ! (%i missingBytes)", missingBytes)
         }
 
 
@@ -503,84 +639,178 @@
         /*-******************/
 
         /* Compress using dictionary */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary of size %i", dictSize);
         {   LZ4_stream_t LZ4_stream;
             LZ4_resetStream(&LZ4_stream);
             LZ4_compress_fast_continue (&LZ4_stream, dict, compressedBuffer, dictSize, (int)compressedBufferSize, 1);   /* Just to fill hash tables */
             blockContinueCompressedSize = LZ4_compress_fast_continue (&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
-            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_continue failed");
+            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue failed");
         }
 
         /* Decompress with dictionary as prefix */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as prefix");
         memcpy(decodedBuffer, dict, dictSize);
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer+dictSize, blockSize, decodedBuffer, dictSize);
-        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_withPrefix64k did not read all compressed block input");
-        crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
-        if (crcCheck!=crcOrig) {
-            int i=0;
-            while (block[i]==decodedBuffer[i]) i++;
-            printf("Wrong Byte at position %i/%i\n", i, blockSize);
-
+        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
+        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
         }
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_withPrefix64k corrupted decoded data (dict %i)", dictSize);
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_decompress_safe_usingDict()");
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer+dictSize, blockContinueCompressedSize, blockSize, decodedBuffer, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
-        crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
 
         /* Compress using External dictionary */
-        FUZ_DISPLAYTEST;
-        dict -= (FUZ_rand(&randState) & 0xF) + 1;   /* Separation, so it is an ExtDict */
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue(), with non-contiguous dictionary");
+        dict -= (FUZ_rand(&randState) & 0xF) + 1;   /* create space, so now dictionary is an ExtDict */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
-        FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_continue failed");
+        FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue failed");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary but with an output buffer too short by one byte");
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         ret = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1);
-        FUZ_CHECKTEST(ret>0, "LZ4_compress_limitedOutput_continue using ExtDict should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
+        FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using ExtDict should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary loaded with LZ4_loadDict()");
+        DISPLAYLEVEL(5, " compress %i bytes from buffer(%p) into dst(%p) using dict(%p) of size %i \n", blockSize, block, decodedBuffer, dict, dictSize);
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         ret = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
-        FUZ_CHECKTEST(ret<=0, "LZ4_compress_limitedOutput_continue should work : enough size available within output buffer");
+        FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue should work : enough size available within output buffer");
 
         /* Decompress with dictionary as external */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as extDict");
+        DISPLAYLEVEL(5, " decoding %i bytes from buffer(%p) using dict(%p) of size %i \n", blockSize, decodedBuffer, dict, dictSize);
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
+        }
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_usingDict should have failed : wrong original size (-1 byte)");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
+        {   U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
+            if ((U32)blockSize > missingBytes) {
+                decodedBuffer[blockSize-missingBytes] = 0;
+                ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-missingBytes, dict, dictSize);
+                FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%u byte)", missingBytes);
+                FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%u byte) (blockSize=%i)", missingBytes, blockSize);
+        }   }
+
+        /* Compress using external dictionary stream */
+        {
+            LZ4_stream_t LZ4_stream;
+            int expectedSize;
+            U32 expectedCrc;
+
+            FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_loadDict()");
+            LZ4_loadDict(&LZ4dict, dict, dictSize);
+            expectedSize = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
+            FUZ_CHECKTEST(expectedSize<=0, "LZ4_compress_fast_continue reference compression for extDictCtx should have succeeded");
+            expectedCrc = XXH32(compressedBuffer, expectedSize, 0);
+
+            FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary()");
+            LZ4_loadDict(&LZ4dict, dict, dictSize);
+            LZ4_resetStream(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
+            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed");
+
+            /* In the future, it might be desirable to let extDictCtx mode's
+             * output diverge from the output generated by regular extDict mode.
+             * Until that time, this comparison serves as a good regression
+             * test.
+             */
+            FUZ_CHECKTEST(blockContinueCompressedSize != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output (%d expected vs %d actual)", expectedSize, blockContinueCompressedSize);
+            FUZ_CHECKTEST(XXH32(compressedBuffer, blockContinueCompressedSize, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+
+            FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary(), but output buffer is 1 byte too short");
+            LZ4_resetStream(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1);
+            FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using extDictCtx should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStream(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer");
+            FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
+            FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStream_fast(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer");
+            FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
+            FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+        }
+
+        /* Decompress with dictionary as external */
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize] = 0;
+        ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize);
+        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
+        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
+        }
+
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize] = 0;
+        ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
+        FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
+
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize-1] = 0;
+        ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize);
+        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_usingDict should have failed : wrong original size (-1 byte)");
+        FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
+
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize-1] = 0;
+        ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize);
+        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)");
+        FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+
+        FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer");
         {   U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
             if ((U32)blockSize > missingBytes) {
                 decodedBuffer[blockSize-missingBytes] = 0;
@@ -590,34 +820,103 @@
         }   }
 
         /* Compress HC using External dictionary */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("LZ4_compress_HC_continue with an external dictionary");
         dict -= (FUZ_rand(&randState) & 7);    /* even bigger separation */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
         LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
+        LZ4_setCompressionLevel(&LZ4dictHC, compressionLevel-1);
         blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, (int)compressedBufferSize);
-        FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compressHC_continue failed");
+        FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue failed");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
         ret = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1);
-        FUZ_CHECKTEST(ret>0, "LZ4_compressHC_limitedOutput_continue using ExtDict should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
+        FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDict should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
         ret = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
-        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
-        FUZ_CHECKTEST(ret<=0, "LZ4_compress_limitedOutput_continue should work : enough size available within output buffer");
+        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue size is different (%i != %i)", ret, blockContinueCompressedSize);
+        FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue should work : enough size available within output buffer");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
-        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size")
-            crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        if (crcCheck!=crcOrig)
-            FUZ_findDiff(block, decodedBuffer);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
+
+        /* Compress HC using external dictionary stream */
+        FUZ_DISPLAYTEST();
+        {
+            LZ4_streamHC_t LZ4_streamHC;
+
+            LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
+            LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
+            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, (int)compressedBufferSize);
+            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue with ExtDictCtx failed");
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1);
+            FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDictCtx should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx should work : enough size available within output buffer");
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC_fast (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx and fast reset size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx and fast reset should work : enough size available within output buffer");
+
+            FUZ_DISPLAYTEST();
+            decodedBuffer[blockSize] = 0;
+            ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
+            FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+            FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+            {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+                if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+                FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+            }
+        }
+
+        /* Compress HC continue destSize */
+        FUZ_DISPLAYTEST();
+        {   int const availableSpace = (FUZ_rand(&randState) % blockSize) + 5;
+            int consumedSize = blockSize;
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
+            LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
+            blockContinueCompressedSize = LZ4_compress_HC_continue_destSize(&LZ4dictHC, block, compressedBuffer, &consumedSize, availableSpace);
+            DISPLAYLEVEL(5, " LZ4_compress_HC_continue_destSize : compressed %6i/%6i into %6i/%6i at cLevel=%i\n", consumedSize, blockSize, blockContinueCompressedSize, availableSpace, compressionLevel);
+            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue_destSize failed");
+            FUZ_CHECKTEST(blockContinueCompressedSize > availableSpace, "LZ4_compress_HC_continue_destSize write overflow");
+            FUZ_CHECKTEST(consumedSize > blockSize, "LZ4_compress_HC_continue_destSize read overflow");
+
+            FUZ_DISPLAYTEST();
+            decodedBuffer[consumedSize] = 0;
+            ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, consumedSize, dict, dictSize);
+            FUZ_CHECKTEST(ret!=consumedSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+            FUZ_CHECKTEST(decodedBuffer[consumedSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size")
+            {   U32 const crcSrc = XXH32(block, consumedSize, 0);
+                U32 const crcDst = XXH32(decodedBuffer, consumedSize, 0);
+                if (crcSrc!=crcDst) FUZ_findDiff(block, decodedBuffer);
+                FUZ_CHECKTEST(crcSrc!=crcDst, "LZ4_decompress_safe_usingDict corrupted decoded data");
+            }
+        }
 
         /* ***** End of tests *** */
         /* Fill stats */
@@ -641,6 +940,7 @@
         free(CNBuffer);
         free(compressedBuffer);
         free(decodedBuffer);
+        FUZ_freeLowAddr(lowAddrBuffer, labSize);
         free(stateLZ4);
         free(stateLZ4HC);
         return result;
@@ -663,6 +963,7 @@
     const unsigned cycleNb= 0;
     char testInput[testInputSize];
     char testCompressed[testCompressedSize];
+    size_t const testVerifySize = testInputSize;
     char testVerify[testInputSize];
     char ringBuffer[ringBufferSize];
     U32 randState = 1;
@@ -688,7 +989,7 @@
         crcOrig = XXH64(testInput, testCompressedSize, 0);
         LZ4_resetStream(&streamingState);
         result = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1);
-        FUZ_CHECKTEST(result==0, "LZ4_compress_limitedOutput_continue() compression failed");
+        FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed!");
 
         result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize);
         FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
@@ -697,8 +998,8 @@
 
         /* ring buffer test */
         {   XXH64_state_t xxhOrig;
-            XXH64_state_t xxhNew;
-            LZ4_streamDecode_t decodeState;
+            XXH64_state_t xxhNewSafe, xxhNewFast;
+            LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
             const U32 maxMessageSizeLog = 10;
             const U32 maxMessageSizeMask = (1<<maxMessageSizeLog) - 1;
             U32 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
@@ -706,26 +1007,36 @@
             U32 rNext = 0;
             U32 dNext = 0;
             const U32 dBufferSize = ringBufferSize + maxMessageSizeMask;
+            int compressedSize;
 
             XXH64_reset(&xxhOrig, 0);
-            XXH64_reset(&xxhNew, 0);
+            XXH64_reset(&xxhNewSafe, 0);
+            XXH64_reset(&xxhNewFast, 0);
             LZ4_resetStream(&streamingState);
-            LZ4_setStreamDecode(&decodeState, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
             while (iNext + messageSize < testCompressedSize) {
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
                 memcpy (ringBuffer + rNext, testInput + iNext, messageSize);
-                result = LZ4_compress_fast_continue(&streamingState, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize, 1);
-                FUZ_CHECKTEST(result==0, "LZ4_compress_limitedOutput_continue() compression failed");
+                compressedSize = LZ4_compress_fast_continue(&streamingState, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize, 1);
+                FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_fast_continue() compression failed");
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed");
+                result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize);
+                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
 
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNew);
-                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+                XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
+
+                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, messageSize);
+                FUZ_CHECKTEST(result!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed");
+
+                XXH64_update(&xxhNewFast, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewFast);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
 
                 /* prepare next message */
                 iNext += messageSize;
@@ -835,7 +1146,6 @@
                 }
 
                 dict = dst;
-                //dict = testInput + segStart;
                 dictSize = segSize;
 
                 dst += segSize + 1;
@@ -848,8 +1158,8 @@
 
         /* ring buffer test */
         {   XXH64_state_t xxhOrig;
-            XXH64_state_t xxhNew;
-            LZ4_streamDecode_t decodeState;
+            XXH64_state_t xxhNewSafe, xxhNewFast;
+            LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
             const U32 maxMessageSizeLog = 10;
             const U32 maxMessageSizeMask = (1<<maxMessageSizeLog) - 1;
             U32 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
@@ -857,26 +1167,36 @@
             U32 rNext = 0;
             U32 dNext = 0;
             const U32 dBufferSize = ringBufferSize + maxMessageSizeMask;
+            int compressedSize;
 
             XXH64_reset(&xxhOrig, 0);
-            XXH64_reset(&xxhNew, 0);
+            XXH64_reset(&xxhNewSafe, 0);
+            XXH64_reset(&xxhNewFast, 0);
             LZ4_resetStreamHC(&sHC, compressionLevel);
-            LZ4_setStreamDecode(&decodeState, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
             while (iNext + messageSize < testCompressedSize) {
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
                 memcpy (ringBuffer + rNext, testInput + iNext, messageSize);
-                result = LZ4_compress_HC_continue(&sHC, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
-                FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+                compressedSize = LZ4_compress_HC_continue(&sHC, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
+                FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed");
+                result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize);
+                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
 
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNew);
-                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+                XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
+
+                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, messageSize);
+                FUZ_CHECKTEST(result!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed");
+
+                XXH64_update(&xxhNewFast, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewFast);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
 
                 /* prepare next message */
                 iNext += messageSize;
@@ -888,71 +1208,105 @@
             }
         }
 
-        /* small decoder-side ring buffer test */
+        /* Ring buffer test : Non synchronized decoder */
+        /* This test uses minimum amount of memory required to setup a decoding ring buffer
+         * while being unsynchronized with encoder
+         * (no assumption done on how the data is encoded, it just follows LZ4 format specification).
+         * This size is documented in lz4.h, and is LZ4_decoderRingBufferSize(maxBlockSize).
+         */
         {   XXH64_state_t xxhOrig;
-            XXH64_state_t xxhNew;
-            LZ4_streamDecode_t decodeState;
-            const U32 maxMessageSizeLog = 12;
-            const U32 maxMessageSizeMask = (1<<maxMessageSizeLog) - 1;
-            U32 messageSize;
+            XXH64_state_t xxhNewSafe, xxhNewFast;
+            LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
+            const int maxMessageSizeLog = 12;
+            const int maxMessageSize = 1 << maxMessageSizeLog;
+            const int maxMessageSizeMask = maxMessageSize - 1;
+            int messageSize;
             U32 totalMessageSize = 0;
-            U32 iNext = 0;
-            U32 dNext = 0;
-            const U32 dBufferSize = 64 KB;
+            const int dBufferSize = LZ4_decoderRingBufferSize(maxMessageSize);
+            char* const ringBufferSafe = testVerify;
+            char* const ringBufferFast = testVerify + dBufferSize + 1;   /* used by LZ4_decompress_fast_continue */
+            int iNext = 0;
+            int dNext = 0;
+            int compressedSize;
 
+            assert((size_t)(dBufferSize + 1 + dBufferSize) < testVerifySize);   /* space used by ringBufferSafe and ringBufferFast */
             XXH64_reset(&xxhOrig, 0);
-            XXH64_reset(&xxhNew, 0);
+            XXH64_reset(&xxhNewSafe, 0);
+            XXH64_reset(&xxhNewFast, 0);
             LZ4_resetStreamHC(&sHC, compressionLevel);
-            LZ4_setStreamDecode(&decodeState, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
-#define BSIZE1 65537
-#define BSIZE2 16435
+#define BSIZE1 (dBufferSize - (maxMessageSize-1))
 
             /* first block */
+            messageSize = BSIZE1;   /* note : we cheat a bit here, in theory no message should be > maxMessageSize. We just want to fill the decoding ring buffer once. */
+            XXH64_update(&xxhOrig, testInput + iNext, messageSize);
+            crcOrig = XXH64_digest(&xxhOrig);
 
-                messageSize = BSIZE1;
-                XXH64_update(&xxhOrig, testInput + iNext, messageSize);
-                crcOrig = XXH64_digest(&xxhOrig);
+            compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
+            FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
 
-                result = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
-                FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+            result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, ringBufferSafe + dNext, compressedSize, messageSize);
+            FUZ_CHECKTEST(result!=messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed");
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "64K D.ringBuffer : LZ4_decompress_safe() test failed");
+            XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, messageSize);
+            { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+              FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
 
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNew);
-                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+            result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize);
+            FUZ_CHECKTEST(result!=compressedSize, "64K D.ringBuffer : LZ4_decompress_fast_continue() test failed");
 
-                /* prepare next message */
-                dNext += messageSize;
-                totalMessageSize += messageSize;
-                messageSize = BSIZE2;
-                iNext = 132000;
-                memcpy(testInput + iNext, testInput + 8, messageSize);
-                if (dNext > dBufferSize) dNext = 0;
+            XXH64_update(&xxhNewFast, ringBufferFast + dNext, messageSize);
+            { U64 const crcNew = XXH64_digest(&xxhNewFast);
+              FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
+
+            /* prepare second message */
+            dNext += messageSize;
+            totalMessageSize += messageSize;
+            messageSize = maxMessageSize;
+            iNext = BSIZE1+1;
+            assert(BSIZE1 >= 65535);
+            memcpy(testInput + iNext, testInput + (BSIZE1-65535), messageSize);  /* will generate a match at max distance == 65535 */
+            FUZ_CHECKTEST(dNext+messageSize <= dBufferSize, "Ring buffer test : second message should require restarting from beginning");
+            dNext = 0;
 
             while (totalMessageSize < 9 MB) {
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
-                result = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
-                FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+                compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
+                FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
+                DISPLAYLEVEL(5, "compressed %i bytes to %i bytes \n", messageSize, compressedSize);
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "64K D.ringBuffer : LZ4_decompress_safe() test failed");
-
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                {   U64 const crcNew = XXH64_digest(&xxhNew);
-                    if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, testVerify + dNext);
-                    FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption during small decoder-side ring buffer test");
+                /* test LZ4_decompress_safe_continue */
+                assert(dNext < dBufferSize);
+                assert(dBufferSize - dNext >= maxMessageSize);
+                result = LZ4_decompress_safe_continue(&decodeStateSafe,
+                                                      testCompressed, ringBufferSafe + dNext,
+                                                      compressedSize, dBufferSize - dNext);   /* works without knowing messageSize, under assumption that messageSize <= maxMessageSize */
+                FUZ_CHECKTEST(result!=messageSize, "D.ringBuffer : LZ4_decompress_safe_continue() test failed");
+                XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, messageSize);
+                {   U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                    if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferSafe + dNext);
+                    FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption during D.ringBuffer test");
                 }
+
+                /* test LZ4_decompress_fast_continue in its own buffer ringBufferFast */
+                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize);
+                FUZ_CHECKTEST(result!=compressedSize, "D.ringBuffer : LZ4_decompress_fast_continue() test failed");
+                XXH64_update(&xxhNewFast, ringBufferFast + dNext, messageSize);
+                {   U64 const crcNew = XXH64_digest(&xxhNewFast);
+                    if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferFast + dNext);
+                    FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption during D.ringBuffer test");
+                }
+
                 /* prepare next message */
                 dNext += messageSize;
                 totalMessageSize += messageSize;
                 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
                 iNext = (FUZ_rand(&randState) & 65535);
-                if (dNext > dBufferSize) dNext = 0;
+                if (dNext + maxMessageSize > dBufferSize) dNext = 0;
             }
         }
     }
@@ -1012,13 +1366,13 @@
                     return FUZ_usage(programName);
 
                 case 'v':   /* verbose mode */
-                    argument++;
                     g_displayLevel++;
+                    argument++;
                     break;
 
                 case 'p':   /* pause at the end */
-                    argument++;
                     use_pause=1;
+                    argument++;
                     break;
 
                 case 'i':
diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c
new file mode 100644
index 0000000..2d34451
--- /dev/null
+++ b/tests/roundTripTest.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This program takes a file in input,
+ * performs an LZ4 round-trip test (compress + decompress)
+ * compares the result with original
+ * and generates an abort() on corruption detection,
+ * in order for afl to register the event as a crash.
+*/
+
+
+/*===========================================
+*   Tuning Constant
+*==========================================*/
+#ifndef MIN_CLEVEL
+#  define MIN_CLEVEL (int)(-5)
+#endif
+
+
+
+/*===========================================
+*   Dependencies
+*==========================================*/
+#include <stddef.h>     /* size_t */
+#include <stdlib.h>     /* malloc, free, exit */
+#include <stdio.h>      /* fprintf */
+#include <string.h>     /* strcmp */
+#include <assert.h>
+#include <sys/types.h>  /* stat */
+#include <sys/stat.h>   /* stat */
+#include "xxhash.h"
+
+#include "lz4.h"
+#include "lz4hc.h"
+
+
+/*===========================================
+*   Macros
+*==========================================*/
+#define MIN(a,b)  ( (a) < (b) ? (a) : (b) )
+
+#define MSG(...)    fprintf(stderr, __VA_ARGS__)
+
+#define CONTROL_MSG(c, ...) {   \
+    if ((c)) {                  \
+        MSG(__VA_ARGS__);       \
+        MSG(" \n");             \
+        abort();                \
+    }                           \
+}
+
+
+static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize)
+{
+    const char* const ip1 = (const char*)buff1;
+    const char* const ip2 = (const char*)buff2;
+    size_t pos;
+
+    for (pos=0; pos<buffSize; pos++)
+        if (ip1[pos]!=ip2[pos])
+            break;
+
+    return pos;
+}
+
+
+/* select a compression level
+ * based on first bytes present in a reference buffer */
+static int select_clevel(const void* refBuff, size_t refBuffSize)
+{
+    const int minCLevel = MIN_CLEVEL;
+    const int maxClevel = LZ4HC_CLEVEL_MAX;
+    const int cLevelSpan = maxClevel - minCLevel;
+    size_t const hashLength = MIN(16, refBuffSize);
+    unsigned const h32 = XXH32(refBuff, hashLength, 0);
+    int const randL = h32 % (cLevelSpan+1);
+
+    return minCLevel + randL;
+}
+
+
+typedef int (*compressFn)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
+
+
+/** roundTripTest() :
+ *  Compresses `srcBuff` into `compressedBuff`,
+ *  then decompresses `compressedBuff` into `resultBuff`.
+ *  If clevel==0, compression level is derived from srcBuff's content head bytes.
+ *  This function abort() if it detects any round-trip error.
+ *  Therefore, if it returns, round trip is considered successfully validated.
+ *  Note : `compressedBuffCapacity` should be `>= LZ4_compressBound(srcSize)`
+ *         for compression to be guaranteed to work */
+static void roundTripTest(void* resultBuff, size_t resultBuffCapacity,
+                          void* compressedBuff, size_t compressedBuffCapacity,
+                    const void* srcBuff, size_t srcSize,
+                          int clevel)
+{
+    int const proposed_clevel = clevel ? clevel : select_clevel(srcBuff, srcSize);
+    int const selected_clevel = proposed_clevel < 0 ? -proposed_clevel : proposed_clevel;   /* if level < 0, it becomes an accelearion value */
+    compressFn compress = selected_clevel >= LZ4HC_CLEVEL_MIN ? LZ4_compress_HC : LZ4_compress_fast;
+    int const cSize = compress((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, selected_clevel);
+    CONTROL_MSG(cSize == 0, "Compression error !");
+
+    {   int const dSize = LZ4_decompress_safe((const char*)compressedBuff, (char*)resultBuff, cSize, (int)resultBuffCapacity);
+        CONTROL_MSG(dSize < 0, "Decompression detected an error !");
+        CONTROL_MSG(dSize != (int)srcSize, "Decompression corruption error : wrong decompressed size !");
+    }
+
+    /* check potential content corruption error */
+    assert(resultBuffCapacity >= srcSize);
+    {   size_t const errorPos = checkBuffers(srcBuff, resultBuff, srcSize);
+        CONTROL_MSG(errorPos != srcSize,
+                    "Silent decoding corruption, at pos %u !!!",
+                    (unsigned)errorPos);
+    }
+
+}
+
+static void roundTripCheck(const void* srcBuff, size_t srcSize, int clevel)
+{
+    size_t const cBuffSize = LZ4_compressBound((int)srcSize);
+    void* const cBuff = malloc(cBuffSize);
+    void* const rBuff = malloc(cBuffSize);
+
+    if (!cBuff || !rBuff) {
+        fprintf(stderr, "not enough memory ! \n");
+        exit(1);
+    }
+
+    roundTripTest(rBuff, cBuffSize,
+                  cBuff, cBuffSize,
+                  srcBuff, srcSize,
+                  clevel);
+
+    free(rBuff);
+    free(cBuff);
+}
+
+
+static size_t getFileSize(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+#endif
+    return (size_t)statbuf.st_size;
+}
+
+
+static int isDirectory(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+    if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+    if (!r && S_ISDIR(statbuf.st_mode)) return 1;
+#endif
+    return 0;
+}
+
+
+/** loadFile() :
+ *  requirement : `buffer` size >= `fileSize` */
+static void loadFile(void* buffer, const char* fileName, size_t fileSize)
+{
+    FILE* const f = fopen(fileName, "rb");
+    if (isDirectory(fileName)) {
+        MSG("Ignoring %s directory \n", fileName);
+        exit(2);
+    }
+    if (f==NULL) {
+        MSG("Impossible to open %s \n", fileName);
+        exit(3);
+    }
+    {   size_t const readSize = fread(buffer, 1, fileSize, f);
+        if (readSize != fileSize) {
+            MSG("Error reading %s \n", fileName);
+            exit(5);
+    }   }
+    fclose(f);
+}
+
+
+static void fileCheck(const char* fileName, int clevel)
+{
+    size_t const fileSize = getFileSize(fileName);
+    void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */);
+    if (!buffer) {
+        MSG("not enough memory \n");
+        exit(4);
+    }
+    loadFile(buffer, fileName, fileSize);
+    roundTripCheck(buffer, fileSize, clevel);
+    free (buffer);
+}
+
+
+int bad_usage(const char* exeName)
+{
+    MSG(" \n");
+    MSG("bad usage: \n");
+    MSG(" \n");
+    MSG("%s [Options] fileName \n", exeName);
+    MSG(" \n");
+    MSG("Options: \n");
+    MSG("-#     : use #=[0-9] compression level (default:0 == random) \n");
+    return 1;
+}
+
+
+int main(int argCount, const char** argv)
+{
+    const char* const exeName = argv[0];
+    int argNb = 1;
+    int clevel = 0;
+
+    assert(argCount >= 1);
+    if (argCount < 2) return bad_usage(exeName);
+
+    if (argv[1][0] == '-') {
+        clevel = argv[1][1] - '0';
+        argNb = 2;
+    }
+
+    if (argNb >= argCount) return bad_usage(exeName);
+
+    fileCheck(argv[argNb], clevel);
+    MSG("no pb detected \n");
+    return 0;
+}
diff --git a/tests/test_install.sh b/tests/test_install.sh
new file mode 100755
index 0000000..f9de402
--- /dev/null
+++ b/tests/test_install.sh
@@ -0,0 +1,20 @@
+#/usr/bin/env sh
+set -e
+
+make="make -C $lz4_root"
+for cmd in install uninstall; do
+  for upper in DUMMY PREFIX EXEC_PREFIX LIBDIR INCLUDEDIR PKGCONFIGDIR BINDIR MANDIR MAN1DIR ; do
+    lower=$(echo $upper | tr '[:upper:]' '[:lower:]')
+    tmp_lower="$(pwd)/tmp-lower-$lower/"
+    tmp_upper="$(pwd)/tmp-upper-$lower/"
+    echo $make $cmd DESTDIR="$tmp_upper" $upper="test"
+    $make $cmd DESTDIR="$tmp_upper" $upper="test" >/dev/null
+    echo $make $cmd DESTDIR="$tmp_lower" $lower="test"
+    $make $cmd DESTDIR="$tmp_lower" $lower="test" >/dev/null
+    command diff -r "$tmp_lower" "$tmp_upper" && echo "SAME!" || false
+    if [ "x$cmd" = "xuninstall" ]; then
+      test -z "$(find "$tmp_lower" -type f)" && echo "EMPTY!" || false
+      rm -rf "$tmp_upper" "$tmp_lower"
+    fi
+  done
+done
diff --git a/visual/.gitignore b/visual/.gitignore
index dea92fc..276f8f5 100644
--- a/visual/.gitignore
+++ b/visual/.gitignore
@@ -6,5 +6,5 @@
 *.sdf
 *.suo
 *.user
-
+ver*/
 VS2010/bin/
diff --git a/visual/VS2010/datagen/datagen.vcxproj b/visual/VS2010/datagen/datagen.vcxproj
index aaf81ad..e24f961 100644
--- a/visual/VS2010/datagen/datagen.vcxproj
+++ b/visual/VS2010/datagen/datagen.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -91,6 +91,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -107,6 +108,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -124,6 +126,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -144,6 +147,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/visual/VS2010/frametest/frametest.vcxproj b/visual/VS2010/frametest/frametest.vcxproj
index 76d12c9..3196768 100644
--- a/visual/VS2010/frametest/frametest.vcxproj
+++ b/visual/VS2010/frametest/frametest.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -91,6 +91,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -107,6 +108,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -124,6 +126,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -144,6 +147,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj b/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
index c10552a..8f503f5 100644
--- a/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
+++ b/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -91,6 +91,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -109,6 +110,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -128,6 +130,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -150,6 +153,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/visual/VS2010/fullbench/fullbench.vcxproj b/visual/VS2010/fullbench/fullbench.vcxproj
index e2d95c9..aa67431 100644
--- a/visual/VS2010/fullbench/fullbench.vcxproj
+++ b/visual/VS2010/fullbench/fullbench.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -91,6 +91,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -107,6 +108,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -124,6 +126,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -144,6 +147,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/visual/VS2010/fuzzer/fuzzer.vcxproj b/visual/VS2010/fuzzer/fuzzer.vcxproj
index 85d6c9b..21cbf56 100644
--- a/visual/VS2010/fuzzer/fuzzer.vcxproj
+++ b/visual/VS2010/fuzzer/fuzzer.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -91,6 +91,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -107,6 +108,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -124,6 +126,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -144,6 +147,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj b/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
index 389f13c..56ec3b9 100644
--- a/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
+++ b/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
@@ -40,14 +40,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -96,6 +96,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -111,6 +112,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -127,6 +129,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -146,6 +149,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
diff --git a/visual/VS2010/liblz4/liblz4.vcxproj b/visual/VS2010/liblz4/liblz4.vcxproj
index a0b8000..61ea159 100644
--- a/visual/VS2010/liblz4/liblz4.vcxproj
+++ b/visual/VS2010/liblz4/liblz4.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -95,6 +95,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -110,6 +111,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -126,6 +128,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -145,6 +148,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
diff --git a/visual/VS2010/lz4/lz4.vcxproj b/visual/VS2010/lz4/lz4.vcxproj
index 693e121..de7a714 100644
--- a/visual/VS2010/lz4/lz4.vcxproj
+++ b/visual/VS2010/lz4/lz4.vcxproj
@@ -39,14 +39,14 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -91,6 +91,7 @@
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -108,6 +109,7 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -126,6 +128,7 @@
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -147,6 +150,7 @@
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>true</EnablePREfast>
       <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/visual/VS2017/datagen/datagen.vcxproj b/visual/VS2017/datagen/datagen.vcxproj
new file mode 100644
index 0000000..30e159e
--- /dev/null
+++ b/visual/VS2017/datagen/datagen.vcxproj
@@ -0,0 +1,173 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{D745AE2F-596A-403A-9B91-81A8C6779243}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>datagen</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\programs\datagen.c" />
+    <ClCompile Include="..\..\..\tests\datagencli.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\programs\datagen.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/frametest/frametest.vcxproj b/visual/VS2017/frametest/frametest.vcxproj
new file mode 100644
index 0000000..a3a403d
--- /dev/null
+++ b/visual/VS2017/frametest/frametest.vcxproj
@@ -0,0 +1,180 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>frametest</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\frametest.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj b/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj
new file mode 100644
index 0000000..d54a8d7
--- /dev/null
+++ b/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj
@@ -0,0 +1,184 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{13992FD2-077E-4954-B065-A428198201A9}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fullbench-dll</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\fullbench.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/fullbench/fullbench.vcxproj b/visual/VS2017/fullbench/fullbench.vcxproj
new file mode 100644
index 0000000..54c9743
--- /dev/null
+++ b/visual/VS2017/fullbench/fullbench.vcxproj
@@ -0,0 +1,180 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fullbench</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\fullbench.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/fuzzer/fuzzer.vcxproj b/visual/VS2017/fuzzer/fuzzer.vcxproj
new file mode 100644
index 0000000..aa6fe42
--- /dev/null
+++ b/visual/VS2017/fuzzer/fuzzer.vcxproj
@@ -0,0 +1,177 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{18B9F1A7-9C66-4352-898B-30804DADE0FD}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fuzzer</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\fuzzer.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/liblz4-dll/liblz4-dll.rc b/visual/VS2017/liblz4-dll/liblz4-dll.rc
new file mode 100644
index 0000000..b1871fe
--- /dev/null
+++ b/visual/VS2017/liblz4-dll/liblz4-dll.rc
@@ -0,0 +1,51 @@
+// Microsoft Visual C++ generated resource script.
+//
+
+#include "lz4.h" /* LZ4_VERSION_STRING */
+#define APSTUDIO_READONLY_SYMBOLS
+#include "verrsrc.h"
+#undef APSTUDIO_READONLY_SYMBOLS
+
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+LANGUAGE 9, 1
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO     VERSIONINFO
+  FILEVERSION       LZ4_VERSION_MAJOR,LZ4_VERSION_MINOR,LZ4_VERSION_RELEASE,0
+  PRODUCTVERSION    LZ4_VERSION_MAJOR,LZ4_VERSION_MINOR,LZ4_VERSION_RELEASE,0
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+#ifdef _DEBUG
+ FILEFLAGS VS_FF_DEBUG
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS VOS_NT_WINDOWS32
+ FILETYPE VFT_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904B0"
+        BEGIN
+            VALUE "CompanyName", "Yann Collet"
+            VALUE "FileDescription", "Extremely fast compression"
+            VALUE "FileVersion", LZ4_VERSION_STRING
+            VALUE "InternalName", "lz4.dll"
+            VALUE "LegalCopyright", "Copyright (C) 2013-2016, Yann Collet"
+            VALUE "OriginalFilename", "lz4.dll"
+            VALUE "ProductName", "LZ4"
+            VALUE "ProductVersion", LZ4_VERSION_STRING
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x0409, 1200
+    END
+END
+
+#endif
diff --git a/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj b/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj
new file mode 100644
index 0000000..8e7ee3b
--- /dev/null
+++ b/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj
@@ -0,0 +1,183 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9800039D-4AAA-43A4-BB78-FEF6F4836927}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>liblz4-dll</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+    <ProjectName>liblz4-dll</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="liblz4-dll.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/liblz4/liblz4.vcxproj b/visual/VS2017/liblz4/liblz4.vcxproj
new file mode 100644
index 0000000..948f7db
--- /dev/null
+++ b/visual/VS2017/liblz4/liblz4.vcxproj
@@ -0,0 +1,179 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>liblz4</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/VS2017/lz4.sln b/visual/VS2017/lz4.sln
new file mode 100644
index 0000000..78f223b
--- /dev/null
+++ b/visual/VS2017/lz4.sln
@@ -0,0 +1,98 @@
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Express 2012 for Windows Desktop
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lz4", "lz4\lz4.vcxproj", "{E30329AC-0057-4FE0-8FDA-7F650D398C4C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblz4-dll", "liblz4-dll\liblz4-dll.vcxproj", "{9800039D-4AAA-43A4-BB78-FEF6F4836927}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblz4", "liblz4\liblz4.vcxproj", "{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fuzzer", "fuzzer\fuzzer.vcxproj", "{18B9F1A7-9C66-4352-898B-30804DADE0FD}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\fullbench.vcxproj", "{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "frametest", "frametest\frametest.vcxproj", "{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "datagen", "datagen\datagen.vcxproj", "{D745AE2F-596A-403A-9B91-81A8C6779243}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench-dll", "fullbench-dll\fullbench-dll.vcxproj", "{13992FD2-077E-4954-B065-A428198201A9}"
+	ProjectSection(ProjectDependencies) = postProject
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927} = {9800039D-4AAA-43A4-BB78-FEF6F4836927}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|Win32.ActiveCfg = Debug|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|Win32.Build.0 = Debug|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|x64.ActiveCfg = Debug|x64
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|x64.Build.0 = Debug|x64
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|Win32.ActiveCfg = Release|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|Win32.Build.0 = Release|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|x64.ActiveCfg = Release|x64
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|x64.Build.0 = Release|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|Win32.ActiveCfg = Debug|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|Win32.Build.0 = Debug|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|x64.ActiveCfg = Debug|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|x64.Build.0 = Debug|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|Win32.ActiveCfg = Release|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|Win32.Build.0 = Release|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|x64.ActiveCfg = Release|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|x64.Build.0 = Release|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|Win32.ActiveCfg = Debug|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|Win32.Build.0 = Debug|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|x64.ActiveCfg = Debug|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|x64.Build.0 = Debug|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|Win32.ActiveCfg = Release|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|Win32.Build.0 = Release|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|x64.ActiveCfg = Release|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|x64.Build.0 = Release|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|Win32.ActiveCfg = Debug|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|Win32.Build.0 = Debug|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|x64.ActiveCfg = Debug|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|x64.Build.0 = Debug|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|Win32.ActiveCfg = Release|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|Win32.Build.0 = Release|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|x64.ActiveCfg = Release|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|x64.Build.0 = Release|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|Win32.ActiveCfg = Debug|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|Win32.Build.0 = Debug|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|x64.ActiveCfg = Debug|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|x64.Build.0 = Debug|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|Win32.ActiveCfg = Release|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|Win32.Build.0 = Release|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|x64.ActiveCfg = Release|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|x64.Build.0 = Release|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|Win32.ActiveCfg = Debug|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|Win32.Build.0 = Debug|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|x64.ActiveCfg = Debug|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|x64.Build.0 = Debug|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|Win32.ActiveCfg = Release|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|Win32.Build.0 = Release|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|x64.ActiveCfg = Release|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|x64.Build.0 = Release|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|Win32.ActiveCfg = Debug|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|Win32.Build.0 = Debug|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|x64.ActiveCfg = Debug|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|x64.Build.0 = Debug|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|Win32.ActiveCfg = Release|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|Win32.Build.0 = Release|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|x64.ActiveCfg = Release|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|x64.Build.0 = Release|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.ActiveCfg = Debug|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.Build.0 = Debug|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.ActiveCfg = Debug|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.Build.0 = Debug|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.ActiveCfg = Release|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
