Upgrade lz4 to v1.9.3 am: 2ce84dba01 am: a57b7caba8 am: 5304b449d3

Original change: https://android-review.googlesource.com/c/platform/external/lz4/+/1824046

Change-Id: I145f40e37b0ad627dacb4bd591d8b847d83c6f56
diff --git a/.cirrus.yml b/.cirrus.yml
new file mode 100644
index 0000000..0c0e7a7
--- /dev/null
+++ b/.cirrus.yml
@@ -0,0 +1,5 @@
+freebsd_instance:
+  image_family: freebsd-12-1
+
+task:
+  script: pkg install -y gmake && gmake test
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000..86b7696
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,32 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Expected behavior**
+Please describe what you expected to happen.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error '...'
+If applicable, add screenshots to help explain your problem.
+
+**System (please complete the following information):**
+ - OS: [e.g. Mac]
+ - Version [e.g. 22]
+ - Compiler [e.g. gcc]
+ - Build System [e.g. Makefile]
+ - Other hardware specs [e.g Core 2 duo...]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000..bbcbbe7
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.gitignore b/.gitignore
index 2a59a7d..d7ba96e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,9 @@
 bin/
 *.zip
 
+# analyzers
+infer-out
+
 # Mac
 .DS_Store
 *.dSYM
diff --git a/.travis.yml b/.travis.yml
index bd29630..f201d52 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,9 +10,7 @@
       script:
         - make   # test library build
         - make clean
-        - make -C tests test-lz4 MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion' | tee # test scenario where `stdout` is not the console
-        - make clean
-        - CFLAGS=-m32 make -C tests test-lz4-contentSize
+        - make test MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion' | tee # test scenario where `stdout` is not the console
 
     # Container-based 12.04 LTS Server Edition 64 bit (doesn't support 32-bit includes)
     - name: (Precise) benchmark test
@@ -33,9 +31,15 @@
       script:
         - CC=clang MOREFLAGS=-fsanitize=address make -C tests test-frametest test-fuzzer
 
-    - name: Custom LZ4_DISTANCE_MAX
+    - name: Custom LZ4_DISTANCE_MAX ; lz4-wlib (CLI linked to dynamic library); LZ4_USER_MEMORY_FUNCTIONS
       script:
         - MOREFLAGS=-DLZ4_DISTANCE_MAX=8000 make check
+        - make clean
+        - make -C programs lz4-wlib
+        - make clean
+        - make -C tests fullbench-wmalloc  # test LZ4_USER_MEMORY_FUNCTIONS
+        - make clean
+        - CC="c++ -Wno-deprecated" make -C tests fullbench-wmalloc  # stricter function signature check
 
     - name: (Precise) g++ and clang CMake test
       dist: precise
@@ -47,18 +51,15 @@
         - make clean travis-install
         - make clean clangtest
 
-
-    # 14.04 LTS Server Edition 64 bit
-    - name: (Trusty) i386 gcc test
-      dist: trusty
+    - name: x32 compatibility test
       addons:
         apt:
           packages:
-            - libc6-dev-i386
             - gcc-multilib
       script:
         - make -C tests test MOREFLAGS=-mx32
 
+    # 14.04 LTS Server Edition 64 bit
     # presume clang >= v3.9.0
     - name: (Trusty) USan test
       dist: trusty
@@ -122,6 +123,21 @@
         - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static
         - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static
 
+    - name: aarch64 real-hw tests
+      arch: arm64
+      script:
+        - make test
+
+    - name: PPC64LE real-hw tests
+      arch: ppc64le
+      script:
+        - make test
+
+    - name: IBM s390x real-hw tests
+      arch: s390x
+      script:
+        - make test
+
     - name: (Xenial) gcc-5 compilation
       dist: xenial
       install:
diff --git a/METADATA b/METADATA
index 5101e76..bb02dd7 100644
--- a/METADATA
+++ b/METADATA
@@ -5,11 +5,11 @@
     type: GIT
     value: "https://github.com/lz4/lz4.git"
   }
-  version: "v1.9.2"
+  version: "v1.9.3"
   license_type: RESTRICTED
   last_upgrade_date {
-    year: 2020
-    month: 4
-    day: 13
+    year: 2021
+    month: 9
+    day: 10
   }
 }
diff --git a/Makefile b/Makefile
index f25f951..744005f 100644
--- a/Makefile
+++ b/Makefile
@@ -98,7 +98,7 @@
 	$(MAKE) -j1 install DESTDIR=~/install_test_dir
 
 cmake:
-	@cd contrib/cmake_unofficial; cmake $(CMAKE_PARAMS) CMakeLists.txt; $(MAKE)
+	@cd build/cmake; cmake $(CMAKE_PARAMS) CMakeLists.txt; $(MAKE)
 
 endif
 
@@ -124,8 +124,8 @@
 
 .PHONY: test
 test:
-	$(MAKE) -C $(TESTDIR) $@
-	$(MAKE) -C $(EXDIR) $@
+	CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" $(MAKE) -C $(TESTDIR) $@
+	CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" $(MAKE) -C $(EXDIR) $@
 
 clangtest: CFLAGS ?= -O3
 clangtest: CFLAGS += -Werror -Wconversion -Wno-sign-conversion
@@ -142,15 +142,20 @@
 	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(PRGDIR)  native CC=clang
 	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang
 
+usan: CC      = clang
+usan: CFLAGS  = -O3 -g -fsanitize=undefined -fno-sanitize-recover=undefined -fsanitize-recover=pointer-overflow
+usan: LDFLAGS = $(CFLAGS)
 usan: clean
-	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
+	CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
 usan32: clean
 	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
+SCANBUILD ?= scan-build
+SCANBUILD_FLAGS += --status-bugs -v --force-analyze-debug-code
 .PHONY: staticAnalyze
 staticAnalyze: clean
-	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
+	CPPFLAGS=-DLZ4_DEBUG=1 CFLAGS=-g $(SCANBUILD) $(SCANBUILD_FLAGS) $(MAKE) all V=1 DEBUGLEVEL=1
 
 .PHONY: cppcheck
 cppcheck:
diff --git a/NEWS b/NEWS
index 860f15b..401931e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,39 @@
+v1.9.3
+perf: highly improved speed in kernel space, by @terrelln
+perf: faster speed with Visual Studio, thanks to @wolfpld and @remittor
+perf: improved dictionary compression speed, by @felixhandte
+perf: fixed LZ4_compress_HC_destSize() ratio, detected by @hsiangkao
+perf: reduced stack usage in high compression mode, by @Yanpas
+api : LZ4_decompress_safe_partial() supports unknown compressed size, requested by @jfkthame
+api : improved LZ4F_compressBound() with automatic flushing, by Christopher Harvie
+api : can (de)compress to/from NULL without UBs
+api : fix alignment test on 32-bit systems (state initialization)
+api : fix LZ4_saveDictHC() in corner case scenario, detected by @IgorKorkin
+cli : `-l` legacy format is now compatible with `-m` multiple files, by Filipe Calasans
+cli : benchmark mode supports dictionary, by @rkoradi
+cli : fix --fast with large argument, detected by @picoHz
+build: link to user-defined memory functions with LZ4_USER_MEMORY_FUNCTIONS, suggested by Yuriy Levchenko
+build: contrib/cmake_unofficial/ moved to build/cmake/
+build: visual/* moved to build/
+build: updated meson script, by @neheb
+build: tinycc support, by Anton Kochkov
+install: Haiku support, by Jerome Duval
+doc : updated LZ4 frame format, clarify EndMark
+
+v1.9.2
+fix : out-of-bound read in exceptional circumstances when using decompress_partial(), by @terrelln
+fix : slim opportunity for out-of-bound write with compress_fast() with a large enough input and when providing an output smaller than recommended (< LZ4_compressBound(inputSize)), by @terrelln
+fix : rare data corruption bug with LZ4_compress_destSize(), by @terrelln
+fix : data corruption bug when Streaming with an Attached Dict in HC Mode, by @felixhandte
+perf: enable LZ4_FAST_DEC_LOOP on aarch64/GCC by default, by @prekageo
+perf: improved lz4frame streaming API speed, by @dreambottle
+perf: speed up lz4hc on slow patterns when using external dictionary, by @terrelln
+api: better in-place decompression and compression support
+cli : --list supports multi-frames files, by @gstedman
+cli: --version outputs to stdout
+cli : add option --best as an alias of -12 , by @Low-power
+misc: Integration into oss-fuzz by @cmeister2, expanded list of scenarios by @terrelln
+
 v1.9.1
 fix : decompression functions were reading a few bytes beyond input size (introduced in v1.9.0, reported by @ppodolsky and @danlark1)
 api : fix : lz4frame initializers compatibility with c++, reported by @degski
diff --git a/README.md b/README.md
index 607fc4e..bdb028c 100644
--- a/README.md
+++ b/README.md
@@ -26,25 +26,13 @@
 
 |Branch      |Status   |
 |------------|---------|
-|master      | [![Build Status][travisMasterBadge]][travisLink] [![Build status][AppveyorMasterBadge]][AppveyorLink] [![coverity][coverBadge]][coverlink] |
 |dev         | [![Build Status][travisDevBadge]][travisLink]    [![Build status][AppveyorDevBadge]][AppveyorLink]                                         |
 
-[travisMasterBadge]: https://travis-ci.org/lz4/lz4.svg?branch=master "Continuous Integration test suite"
 [travisDevBadge]: https://travis-ci.org/lz4/lz4.svg?branch=dev "Continuous Integration test suite"
 [travisLink]: https://travis-ci.org/lz4/lz4
-[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=master&svg=true "Windows test suite"
 [AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=dev&svg=true "Windows test suite"
 [AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4-1lndh
-[coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch"
-[coverlink]: https://scan.coverity.com/projects/4735
 
-> **Branch Policy:**
-> - The "master" branch is considered stable, at all times.
-> - The "dev" branch is the one where all contributions must be merged
-    before being promoted to master.
->   + If you plan to propose a patch, please commit into the "dev" branch,
-      or its own feature branch.
-      Direct commit to "master" are not permitted.
 
 Benchmarks
 -------------------------
@@ -95,6 +83,17 @@
 [redirection]: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html
 [command redefinition]: https://www.gnu.org/prep/standards/html_node/Utilities-in-Makefiles.html
 
+Building LZ4 - Using vcpkg
+
+You can download and install LZ4 using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
+
+    git clone https://github.com/Microsoft/vcpkg.git
+    cd vcpkg
+    ./bootstrap-vcpkg.sh
+    ./vcpkg integrate install
+    vcpkg install lz4
+
+The LZ4 port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
 
 Documentation
 -------------------------
diff --git a/appveyor.yml b/appveyor.yml
index 056719a..b4c27ef 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,22 +1,22 @@
 version: 1.0.{build}
 environment:
   matrix:
-  - COMPILER: "visual"
-    CONFIGURATION: "Debug"
-    PLATFORM: "x64"
-  - COMPILER: "visual"
-    CONFIGURATION: "Debug"
-    PLATFORM: "Win32"
-  - COMPILER: "visual"
-    CONFIGURATION: "Release"
-    PLATFORM: "x64"
-  - COMPILER: "visual"
-    CONFIGURATION: "Release"
-    PLATFORM: "Win32"
   - COMPILER: "gcc"
     PLATFORM: "mingw64"
   - COMPILER: "gcc"
     PLATFORM: "mingw32"
+  - COMPILER: "visual"
+    CONFIGURATION: "Debug"
+    PLATFORM: "x64"
+  - COMPILER: "visual"
+    CONFIGURATION: "Debug"
+    PLATFORM: "Win32"
+  - COMPILER: "visual"
+    CONFIGURATION: "Release"
+    PLATFORM: "x64"
+  - COMPILER: "visual"
+    CONFIGURATION: "Release"
+    PLATFORM: "Win32"
   - COMPILER: "gcc"
     PLATFORM: "clang"
 
@@ -47,10 +47,14 @@
       make -v &&
       echo ----- &&
       if not [%PLATFORM%]==[clang] (
-        make -C programs lz4 && make -C tests fullbench && make -C lib lib
+        make -C programs lz4 &&
+        make -C tests fullbench &&
+        make -C tests fuzzer &&
+        make -C lib lib V=1
       ) ELSE (
         make -C programs lz4 CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" &&
         make -C tests fullbench CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" &&
+        make -C tests fuzzer CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" &&
         make -C lib lib CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
       )
     )
@@ -63,39 +67,39 @@
       COPY lib\lz4hc.h bin\include\ &&
       COPY lib\lz4frame.h bin\include\ &&
       COPY lib\liblz4.a bin\static\liblz4_static.lib &&
-      COPY lib\dll\liblz4.* bin\dll\ &&
+      COPY lib\dll\* bin\dll\ &&
       COPY lib\dll\example\Makefile bin\example\ &&
       COPY lib\dll\example\fullbench-dll.* bin\example\ &&
       COPY lib\dll\example\README.md bin\ &&
       COPY programs\lz4.exe bin\lz4.exe
     )
   - if [%COMPILER%]==[gcc] if [%PLATFORM%]==[mingw64] (
-      7z.exe a bin\lz4_x64.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
+      7z.exe a -bb1 bin\lz4_x64.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
       appveyor PushArtifact bin\lz4_x64.zip
     )
   - if [%COMPILER%]==[gcc] if [%PLATFORM%]==[mingw32] (
-      7z.exe a bin\lz4_x86.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
+      7z.exe a -bb1 bin\lz4_x86.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
       appveyor PushArtifact bin\lz4_x86.zip
     )
-  - if [%COMPILER%]==[gcc] (COPY tests\fullbench.exe programs\)
+  - if [%COMPILER%]==[gcc] (COPY tests\*.exe programs\)
   - if [%COMPILER%]==[visual] (
       ECHO *** &&
       ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&
-      msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      msbuild "build\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
       ECHO *** &&
       ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&
-      msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      msbuild "build\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
       ECHO *** &&
       ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&
-      msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      msbuild "build\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
       ECHO *** &&
       ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&
-      msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
-      COPY visual\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\
+      msbuild "build\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\
     )
 
 test_script:
@@ -110,7 +114,9 @@
       lz4 -i1b10 lz4.exe &&
       lz4 -i1b15 lz4.exe &&
       echo ------- lz4 tested ------- &&
-      fullbench.exe -i1 fullbench.exe
+      fullbench.exe -i1 fullbench.exe &&
+      echo trying to launch fuzzer.exe &&
+      fuzzer.exe -v -T30s
     )
 
 artifacts:
diff --git a/build/.gitignore b/build/.gitignore
new file mode 100644
index 0000000..69e1111
--- /dev/null
+++ b/build/.gitignore
@@ -0,0 +1,16 @@
+# Visual C++
+.vs/
+*Copy
+*.db
+*.opensdf
+*.sdf
+*.suo
+*.user
+ver*/
+VS2010/bin/
+VS2017/bin/
+ipch
+
+# Fixup for lz4 project directories
+!VS2010/lz4
+!VS2017/lz4
diff --git a/visual/README.md b/build/README.md
similarity index 80%
rename from visual/README.md
rename to build/README.md
index 216971f..d416aeb 100644
--- a/visual/README.md
+++ b/build/README.md
@@ -4,7 +4,9 @@
 #### Included projects
 
 The following projects are included with the lz4 distribution:
+- `cmake` - CMake project
 - `VS2010` - Visual Studio 2010 project (which also works well with Visual Studio 2012, 2013, 2015)
+- `VS2017` - Visual Studio 2017 project
 
 
 #### How to compile lz4 with Visual Studio
@@ -12,25 +14,25 @@
 1. Install Visual Studio e.g. VS 2015 Community Edition (it's free).
 2. Download the latest version of lz4 from https://github.com/lz4/lz4/releases
 3. Decompress ZIP archive.
-4. Go to decompressed directory then to `visual` then `VS2010` and open `lz4.sln`
+4. Go to decompressed directory then to `build` then `VS2010` and open `lz4.sln`
 5. Visual Studio will ask about converting VS2010 project to VS2015 and you should agree.
 6. Change `Debug` to `Release` and if you have 64-bit Windows change also `Win32` to `x64`.
 7. Press F7 on keyboard or select `BUILD` from the menu bar and choose `Build Solution`.
-8. If compilation will be fine a compiled executable will be in `visual\VS2010\bin\x64_Release\lz4.exe`
+8. If compilation will be fine a compiled executable will be in `build\VS2010\bin\x64_Release\lz4.exe`
 
 
 #### Projects available within lz4.sln
 
 The Visual Studio solution file `lz4.sln` contains many projects that will be compiled to the
-`visual\VS2010\bin\$(Platform)_$(Configuration)` directory. For example `lz4` set to `x64` and
-`Release` will be compiled to `visual\VS2010\bin\x64_Release\lz4.exe`. The solution file contains the
+`build\VS2010\bin\$(Platform)_$(Configuration)` directory. For example `lz4` set to `x64` and
+`Release` will be compiled to `build\VS2010\bin\x64_Release\lz4.exe`. The solution file contains the
 following projects:
 
 - `lz4` : Command Line Utility, supporting gzip-like arguments
 - `datagen` : Synthetic and parametrable data generator, for tests
 - `frametest` : Test tool that checks lz4frame integrity on target platform
 - `fullbench`  : Precisely measure speed for each lz4 inner functions
-- `fuzzer` : Test tool, to check lz4 integrity on target platform 
+- `fuzzer` : Test tool, to check lz4 integrity on target platform
 - `liblz4` : A static LZ4 library compiled to `liblz4_static.lib`
 - `liblz4-dll` : A dynamic LZ4 library (DLL) compiled to `liblz4.dll` with the import library `liblz4.lib`
 - `fullbench-dll` : The fullbench program compiled with the import library; the executable requires LZ4 DLL
@@ -39,8 +41,8 @@
 #### Using LZ4 DLL with Microsoft Visual C++ project
 
 The header files `lib\lz4.h`, `lib\lz4hc.h`, `lib\lz4frame.h` and the import library
-`visual\VS2010\bin\$(Platform)_$(Configuration)\liblz4.lib` are required to compile a
-project using Visual C++.
+`build\VS2010\bin\$(Platform)_$(Configuration)\liblz4.lib` are required to
+compile a project using Visual C++.
 
 1. The path to header files should be added to `Additional Include Directories` that can
    be found in Project Properties of Visual Studio IDE in the `C/C++` Property Pages on the `General` page.
@@ -50,4 +52,4 @@
    then the directory has to be added to `Linker\General\Additional Library Directories`.
 
 The compiled executable will require LZ4 DLL which is available at
-`visual\VS2010\bin\$(Platform)_$(Configuration)\liblz4.dll`.
+`build\VS2010\bin\$(Platform)_$(Configuration)\liblz4.dll`.
diff --git a/visual/VS2010/datagen/datagen.vcxproj b/build/VS2010/datagen/datagen.vcxproj
similarity index 100%
rename from visual/VS2010/datagen/datagen.vcxproj
rename to build/VS2010/datagen/datagen.vcxproj
diff --git a/visual/VS2010/frametest/frametest.vcxproj b/build/VS2010/frametest/frametest.vcxproj
similarity index 100%
rename from visual/VS2010/frametest/frametest.vcxproj
rename to build/VS2010/frametest/frametest.vcxproj
diff --git a/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
similarity index 100%
rename from visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
rename to build/VS2010/fullbench-dll/fullbench-dll.vcxproj
diff --git a/visual/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
similarity index 100%
rename from visual/VS2010/fullbench/fullbench.vcxproj
rename to build/VS2010/fullbench/fullbench.vcxproj
diff --git a/visual/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
similarity index 100%
rename from visual/VS2010/fuzzer/fuzzer.vcxproj
rename to build/VS2010/fuzzer/fuzzer.vcxproj
diff --git a/visual/VS2010/liblz4-dll/liblz4-dll.rc b/build/VS2010/liblz4-dll/liblz4-dll.rc
similarity index 100%
rename from visual/VS2010/liblz4-dll/liblz4-dll.rc
rename to build/VS2010/liblz4-dll/liblz4-dll.rc
diff --git a/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj b/build/VS2010/liblz4-dll/liblz4-dll.vcxproj
similarity index 100%
rename from visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
rename to build/VS2010/liblz4-dll/liblz4-dll.vcxproj
diff --git a/visual/VS2010/liblz4/liblz4.vcxproj b/build/VS2010/liblz4/liblz4.vcxproj
similarity index 100%
rename from visual/VS2010/liblz4/liblz4.vcxproj
rename to build/VS2010/liblz4/liblz4.vcxproj
diff --git a/visual/VS2010/lz4.sln b/build/VS2010/lz4.sln
similarity index 100%
rename from visual/VS2010/lz4.sln
rename to build/VS2010/lz4.sln
diff --git a/visual/VS2010/lz4/lz4.rc b/build/VS2010/lz4/lz4.rc
similarity index 100%
rename from visual/VS2010/lz4/lz4.rc
rename to build/VS2010/lz4/lz4.rc
diff --git a/visual/VS2010/lz4/lz4.vcxproj b/build/VS2010/lz4/lz4.vcxproj
similarity index 100%
rename from visual/VS2010/lz4/lz4.vcxproj
rename to build/VS2010/lz4/lz4.vcxproj
diff --git a/visual/VS2017/datagen/datagen.vcxproj b/build/VS2017/datagen/datagen.vcxproj
similarity index 100%
rename from visual/VS2017/datagen/datagen.vcxproj
rename to build/VS2017/datagen/datagen.vcxproj
diff --git a/visual/VS2017/frametest/frametest.vcxproj b/build/VS2017/frametest/frametest.vcxproj
similarity index 100%
rename from visual/VS2017/frametest/frametest.vcxproj
rename to build/VS2017/frametest/frametest.vcxproj
diff --git a/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj b/build/VS2017/fullbench-dll/fullbench-dll.vcxproj
similarity index 100%
rename from visual/VS2017/fullbench-dll/fullbench-dll.vcxproj
rename to build/VS2017/fullbench-dll/fullbench-dll.vcxproj
diff --git a/visual/VS2017/fullbench/fullbench.vcxproj b/build/VS2017/fullbench/fullbench.vcxproj
similarity index 100%
rename from visual/VS2017/fullbench/fullbench.vcxproj
rename to build/VS2017/fullbench/fullbench.vcxproj
diff --git a/visual/VS2017/fuzzer/fuzzer.vcxproj b/build/VS2017/fuzzer/fuzzer.vcxproj
similarity index 100%
rename from visual/VS2017/fuzzer/fuzzer.vcxproj
rename to build/VS2017/fuzzer/fuzzer.vcxproj
diff --git a/visual/VS2017/liblz4-dll/liblz4-dll.rc b/build/VS2017/liblz4-dll/liblz4-dll.rc
similarity index 100%
rename from visual/VS2017/liblz4-dll/liblz4-dll.rc
rename to build/VS2017/liblz4-dll/liblz4-dll.rc
diff --git a/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj b/build/VS2017/liblz4-dll/liblz4-dll.vcxproj
similarity index 100%
rename from visual/VS2017/liblz4-dll/liblz4-dll.vcxproj
rename to build/VS2017/liblz4-dll/liblz4-dll.vcxproj
diff --git a/visual/VS2017/liblz4/liblz4.vcxproj b/build/VS2017/liblz4/liblz4.vcxproj
similarity index 100%
rename from visual/VS2017/liblz4/liblz4.vcxproj
rename to build/VS2017/liblz4/liblz4.vcxproj
diff --git a/visual/VS2017/lz4.sln b/build/VS2017/lz4.sln
similarity index 88%
rename from visual/VS2017/lz4.sln
rename to build/VS2017/lz4.sln
index 72e98fc..6a2779f 100644
--- a/visual/VS2017/lz4.sln
+++ b/build/VS2017/lz4.sln
@@ -19,6 +19,8 @@
 		{9800039D-4AAA-43A4-BB78-FEF6F4836927} = {9800039D-4AAA-43A4-BB78-FEF6F4836927}
 	EndProjectSection
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lz4", "lz4\lz4.vcxproj", "{60A3115E-B988-41EE-8815-F4D4F253D866}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Win32 = Debug|Win32
@@ -83,6 +85,14 @@
 		{13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32
 		{13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64
 		{13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Debug|Win32.ActiveCfg = Debug|Win32
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Debug|Win32.Build.0 = Debug|Win32
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Debug|x64.ActiveCfg = Debug|x64
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Debug|x64.Build.0 = Debug|x64
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Release|Win32.ActiveCfg = Release|Win32
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Release|Win32.Build.0 = Release|Win32
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Release|x64.ActiveCfg = Release|x64
+		{60A3115E-B988-41EE-8815-F4D4F253D866}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/visual/VS2010/lz4/lz4.rc b/build/VS2017/lz4/lz4.rc
similarity index 100%
copy from visual/VS2010/lz4/lz4.rc
copy to build/VS2017/lz4/lz4.rc
diff --git a/build/VS2017/lz4/lz4.vcxproj b/build/VS2017/lz4/lz4.vcxproj
new file mode 100644
index 0000000..b4fed24
--- /dev/null
+++ b/build/VS2017/lz4/lz4.vcxproj
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{60A3115E-B988-41EE-8815-F4D4F253D866}</ProjectGuid>
+    <RootNamespace>lz4</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>false</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>false</OptimizeReferences>
+      <EnableCOMDATFolding>false</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\programs\bench.c" />
+    <ClCompile Include="..\..\..\programs\datagen.c" />
+    <ClCompile Include="..\..\..\programs\lz4cli.c" />
+    <ClCompile Include="..\..\..\programs\lz4io.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+    <ClInclude Include="..\..\..\programs\bench.h" />
+    <ClInclude Include="..\..\..\programs\datagen.h" />
+    <ClInclude Include="..\..\..\programs\lz4io.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="lz4.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/contrib/cmake_unofficial/.gitignore b/build/cmake/.gitignore
similarity index 100%
rename from contrib/cmake_unofficial/.gitignore
rename to build/cmake/.gitignore
diff --git a/contrib/cmake_unofficial/CMakeLists.txt b/build/cmake/CMakeLists.txt
similarity index 94%
rename from contrib/cmake_unofficial/CMakeLists.txt
rename to build/cmake/CMakeLists.txt
index 42d92ea..57501ee 100644
--- a/contrib/cmake_unofficial/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -12,6 +12,7 @@
 
 set(LZ4_TOP_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
 
+option(LZ4_BUILD_CLI "Build lz4 program" ON)
 option(LZ4_BUILD_LEGACY_LZ4C "Build lz4c progam with legacy argument support" ON)
 
 # Parse version information
@@ -106,6 +107,10 @@
     OUTPUT_NAME lz4
     SOVERSION "${LZ4_VERSION_MAJOR}"
     VERSION "${LZ4_VERSION_STRING}")
+  if(MSVC)
+    target_compile_definitions(lz4_shared PRIVATE
+      LZ4_DLL_EXPORT=1)
+  endif()
   list(APPEND LZ4_LIBRARIES_BUILT lz4_shared)
 endif()
 if(BUILD_STATIC_LIBS)
@@ -124,10 +129,12 @@
 endif()
 
 # lz4
-set(LZ4_PROGRAMS_BUILT lz4cli)
-add_executable(lz4cli ${LZ4_CLI_SOURCES})
-set_target_properties(lz4cli PROPERTIES OUTPUT_NAME lz4)
-target_link_libraries(lz4cli ${LZ4_LINK_LIBRARY})
+if (LZ4_BUILD_CLI)
+  set(LZ4_PROGRAMS_BUILT lz4cli)
+  add_executable(lz4cli ${LZ4_CLI_SOURCES})
+  set_target_properties(lz4cli PROPERTIES OUTPUT_NAME lz4)
+  target_link_libraries(lz4cli ${LZ4_LINK_LIBRARY})
+endif()
 
 # lz4c
 if (LZ4_BUILD_LEGACY_LZ4C)
@@ -189,7 +196,7 @@
     DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
 
   # install lz4cat and unlz4 symlinks on *nix
-  if(UNIX)
+  if(UNIX AND LZ4_BUILD_CLI)
     install(CODE "
       foreach(f lz4cat unlz4)
         set(dest \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_BINDIR}/\${f}\")
@@ -205,7 +212,7 @@
       install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${f}.1"
         DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")
     endforeach()
-  endif(UNIX)
+  endif(UNIX AND LZ4_BUILD_CLI)
 endif(NOT LZ4_BUNDLED_MODE)
 
 # pkg-config
diff --git a/contrib/debian/copyright b/contrib/debian/copyright
index 18a7f48..0914768 100644
--- a/contrib/debian/copyright
+++ b/contrib/debian/copyright
@@ -4,6 +4,6 @@
 Source: https://github.com/lz4/lz4
 
 Files: *
-Copyright: (C) 2011+ Yann Collet
+Copyright: (C) 2011-2020 Yann Collet
 License: GPL-2+
- The full text of license: https://github.com/Cyan4973/lz4/blob/master/lib/LICENSE
+ The full text of license: https://github.com/lz4/lz4/blob/dev/lib/LICENSE
diff --git a/contrib/debian/rules b/contrib/debian/rules
index 748e68d..c897bc5 100755
--- a/contrib/debian/rules
+++ b/contrib/debian/rules
@@ -4,5 +4,4 @@
 include /usr/share/cdbs/1/class/cmake.mk
 
 
-DEB_CMAKE_EXTRA_FLAGS := -DCMAKE_BUILD_TYPE=RelWithDebInfo ../cmake_unofficial
-
+DEB_CMAKE_EXTRA_FLAGS := -DCMAKE_BUILD_TYPE=RelWithDebInfo ../../build/cmake
diff --git a/contrib/gen_manual/gen_manual.cpp b/contrib/gen_manual/gen_manual.cpp
index bedef94..d5fe702 100644
--- a/contrib/gen_manual/gen_manual.cpp
+++ b/contrib/gen_manual/gen_manual.cpp
@@ -96,10 +96,9 @@
     epos = line.find("*/");
     if (spos!=string::npos && epos!=string::npos) {
         sout << line.substr(0, spos);
-        sout << "</b>" << line.substr(spos) << "<b>" << endl;
+        sout << "</b>" << line.substr(spos) << "<b>" << '\n';
     } else {
-      //  fprintf(stderr, "lines=%s\n", line.c_str());
-        sout << line << endl;
+        sout << line << '\n';
     }
 }
 
diff --git a/contrib/meson/meson.build b/contrib/meson/meson.build
index 65a4c26..d1e97d9 100644
--- a/contrib/meson/meson.build
+++ b/contrib/meson/meson.build
@@ -7,6 +7,10 @@
 # in the COPYING file in the root directory of this source tree).
 # #############################################################################
 
+# This is a dummy meson file.
+# The intention is that it can be easily moved to the root of the project
+# (together with meson_options.txt) and packaged for wrapdb.
+
 project('lz4', ['c'],
   license: ['BSD', 'GPLv2'],
   default_options : ['c_std=c99',
@@ -14,112 +18,4 @@
   version: 'DUMMY',
   meson_version: '>=0.47.0')
 
-cc = meson.get_compiler('c')
-pkgconfig = import('pkgconfig')
-python3 = import('python').find_installation()
-c_std = get_option('c_std')
-default_library = get_option('default_library')
-
-host_machine_os = host_machine.system()
-os_windows = 'windows'
-os_linux = 'linux'
-os_darwin = 'darwin'
-os_freebsd = 'freebsd'
-os_sun = 'sunos'
-
-cc_id = cc.get_id()
-compiler_gcc = 'gcc'
-compiler_clang = 'clang'
-compiler_msvc = 'msvc'
-
-lz4_version = meson.project_version()
-
-lz4_h_file = join_paths(meson.current_source_dir(), '../../lib/lz4.h')
-GetLz4LibraryVersion_py = files('GetLz4LibraryVersion.py')
-r = run_command(python3, GetLz4LibraryVersion_py, lz4_h_file)
-if r.returncode() == 0
-  lz4_version = r.stdout().strip()
-  message('Project version is now: @0@'.format(lz4_version))
-else
-  error('Cannot find project version in @0@'.format(lz4_h_file))
-endif
-
-lz4_libversion = lz4_version
-
-# =============================================================================
-# Installation directories
-# =============================================================================
-
-lz4_prefix = get_option('prefix')
-lz4_bindir = get_option('bindir')
-lz4_datadir = get_option('datadir')
-lz4_mandir = get_option('mandir')
-lz4_docdir = join_paths(lz4_datadir, 'doc', meson.project_name())
-
-# =============================================================================
-# Project options
-# =============================================================================
-
-buildtype = get_option('buildtype')
-
-# Built-in options
-use_debug = get_option('debug')
-
-# Custom options
-debug_level = get_option('debug_level')
-use_backtrace = get_option('backtrace')
-
-bin_programs = get_option('bin_programs')
-bin_contrib = get_option('bin_contrib')
-bin_tests = get_option('bin_tests')
-bin_examples = get_option('bin_examples')
-#feature_multi_thread = get_option('multi_thread')
-
-# =============================================================================
-# Dependencies
-# =============================================================================
-
-#libm_dep = cc.find_library('m', required: bin_tests)
-#thread_dep = dependency('threads', required: feature_multi_thread)
-#use_multi_thread = thread_dep.found()
-
-# =============================================================================
-# Compiler flags
-# =============================================================================
-
-add_project_arguments(['-DXXH_NAMESPACE=LZ4_'], language: 'c')
-
-if [compiler_gcc, compiler_clang].contains(cc_id)
-  common_warning_flags = []
-  # Should use Meson's own --werror build option
-  #common_warning_flags += ['-Werror']
-  if c_std == 'c89' or c_std == 'gnu89'
-    common_warning_flags += ['-pedantic', '-Wno-long-long', '-Wno-variadic-macros']
-  elif c_std == 'c99' or c_std == 'gnu99'
-    common_warning_flags += ['-pedantic']
-  endif
-  cc_compile_flags = cc.get_supported_arguments(common_warning_flags)
-  add_project_arguments(cc_compile_flags, language: 'c')
-endif
-
-# =============================================================================
-# Subdirs
-# =============================================================================
-
-subdir('lib')
-
-if bin_programs
-  subdir('programs')
-endif
-
-if bin_tests
-  subdir('tests')
-endif
-
-if bin_contrib
-  subdir('contrib')
-endif
-
-if bin_examples
-  subdir('examples')
-endif
+subdir('meson')
diff --git a/contrib/meson/GetLz4LibraryVersion.py b/contrib/meson/meson/GetLz4LibraryVersion.py
similarity index 100%
rename from contrib/meson/GetLz4LibraryVersion.py
rename to contrib/meson/meson/GetLz4LibraryVersion.py
diff --git a/contrib/meson/InstallSymlink.py b/contrib/meson/meson/InstallSymlink.py
similarity index 100%
rename from contrib/meson/InstallSymlink.py
rename to contrib/meson/meson/InstallSymlink.py
diff --git a/contrib/meson/contrib/gen_manual/meson.build b/contrib/meson/meson/contrib/gen_manual/meson.build
similarity index 97%
rename from contrib/meson/contrib/gen_manual/meson.build
rename to contrib/meson/meson/contrib/gen_manual/meson.build
index 38180e9..a872bd6 100644
--- a/contrib/meson/contrib/gen_manual/meson.build
+++ b/contrib/meson/meson/contrib/gen_manual/meson.build
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # #############################################################################
 
-lz4_root_dir = '../../../..'
+lz4_root_dir = '../../../../..'
 
 add_languages('cpp')
 cxx = meson.get_compiler('cpp')
diff --git a/contrib/meson/contrib/meson.build b/contrib/meson/meson/contrib/meson.build
similarity index 100%
rename from contrib/meson/contrib/meson.build
rename to contrib/meson/meson/contrib/meson.build
diff --git a/contrib/meson/examples/meson.build b/contrib/meson/meson/examples/meson.build
similarity index 98%
rename from contrib/meson/examples/meson.build
rename to contrib/meson/meson/examples/meson.build
index 3c13214..493049d 100644
--- a/contrib/meson/examples/meson.build
+++ b/contrib/meson/meson/examples/meson.build
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # #############################################################################
 
-lz4_root_dir = '../../..'
+lz4_root_dir = '../../../..'
 
 #examples_c_args = ['-Wextra', '-Wundef', '-Wshadow', '-Wcast-align', '-Wstrict-prototypes']
 
diff --git a/contrib/meson/lib/meson.build b/contrib/meson/meson/lib/meson.build
similarity index 98%
rename from contrib/meson/lib/meson.build
rename to contrib/meson/meson/lib/meson.build
index e782334..131edcb 100644
--- a/contrib/meson/lib/meson.build
+++ b/contrib/meson/meson/lib/meson.build
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # #############################################################################
 
-lz4_root_dir = '../../..'
+lz4_root_dir = '../../../..'
 
 liblz4_includes = [include_directories(join_paths(lz4_root_dir, 'lib'))]
 liblz4_sources = [join_paths(lz4_root_dir, 'lib/lz4.c'),
diff --git a/contrib/meson/meson/meson.build b/contrib/meson/meson/meson.build
new file mode 100644
index 0000000..b278b7c
--- /dev/null
+++ b/contrib/meson/meson/meson.build
@@ -0,0 +1,117 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+cc = meson.get_compiler('c')
+pkgconfig = import('pkgconfig')
+c_std = get_option('c_std')
+default_library = get_option('default_library')
+
+host_machine_os = host_machine.system()
+os_windows = 'windows'
+os_linux = 'linux'
+os_darwin = 'darwin'
+os_freebsd = 'freebsd'
+os_sun = 'sunos'
+
+cc_id = cc.get_id()
+compiler_gcc = 'gcc'
+compiler_clang = 'clang'
+compiler_msvc = 'msvc'
+
+lz4_version = meson.project_version()
+
+lz4_h_file = join_paths(meson.current_source_dir(), '../../../lib/lz4.h')
+GetLz4LibraryVersion_py = find_program('GetLz4LibraryVersion.py', native : true)
+r = run_command(GetLz4LibraryVersion_py, lz4_h_file)
+if r.returncode() == 0
+  lz4_version = r.stdout().strip()
+  message('Project version is now: @0@'.format(lz4_version))
+else
+  error('Cannot find project version in @0@'.format(lz4_h_file))
+endif
+
+lz4_libversion = lz4_version
+
+# =============================================================================
+# Installation directories
+# =============================================================================
+
+lz4_prefix = get_option('prefix')
+lz4_bindir = get_option('bindir')
+lz4_datadir = get_option('datadir')
+lz4_mandir = get_option('mandir')
+lz4_docdir = join_paths(lz4_datadir, 'doc', meson.project_name())
+
+# =============================================================================
+# Project options
+# =============================================================================
+
+buildtype = get_option('buildtype')
+
+# Built-in options
+use_debug = get_option('debug')
+
+# Custom options
+debug_level = get_option('debug_level')
+use_backtrace = get_option('backtrace')
+
+bin_programs = get_option('bin_programs')
+bin_contrib = get_option('bin_contrib')
+bin_tests = get_option('bin_tests')
+bin_examples = get_option('bin_examples')
+#feature_multi_thread = get_option('multi_thread')
+
+# =============================================================================
+# Dependencies
+# =============================================================================
+
+#libm_dep = cc.find_library('m', required: bin_tests)
+#thread_dep = dependency('threads', required: feature_multi_thread)
+#use_multi_thread = thread_dep.found()
+
+# =============================================================================
+# Compiler flags
+# =============================================================================
+
+add_project_arguments(['-DXXH_NAMESPACE=LZ4_'], language: 'c')
+
+if [compiler_gcc, compiler_clang].contains(cc_id)
+  common_warning_flags = []
+  # Should use Meson's own --werror build option
+  #common_warning_flags += ['-Werror']
+  if c_std == 'c89' or c_std == 'gnu89'
+    common_warning_flags += ['-pedantic', '-Wno-long-long', '-Wno-variadic-macros']
+  elif c_std == 'c99' or c_std == 'gnu99'
+    common_warning_flags += ['-pedantic']
+  endif
+  cc_compile_flags = cc.get_supported_arguments(common_warning_flags)
+  add_project_arguments(cc_compile_flags, language: 'c')
+endif
+
+# =============================================================================
+# Subdirs
+# =============================================================================
+
+subdir('lib')
+
+if bin_programs
+  subdir('programs')
+endif
+
+if bin_tests
+  subdir('tests')
+endif
+
+if bin_contrib
+  subdir('contrib')
+endif
+
+if bin_examples
+  subdir('examples')
+endif
diff --git a/contrib/meson/programs/meson.build b/contrib/meson/meson/programs/meson.build
similarity index 98%
rename from contrib/meson/programs/meson.build
rename to contrib/meson/meson/programs/meson.build
index df64eb0..705dbf5 100644
--- a/contrib/meson/programs/meson.build
+++ b/contrib/meson/meson/programs/meson.build
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # #############################################################################
 
-lz4_root_dir = '../../..'
+lz4_root_dir = '../../../..'
 
 lz4_includes = include_directories(join_paths(lz4_root_dir, 'programs'))
 lz4_sources = [join_paths(lz4_root_dir, 'programs/bench.c'),
diff --git a/contrib/meson/tests/meson.build b/contrib/meson/meson/tests/meson.build
similarity index 98%
rename from contrib/meson/tests/meson.build
rename to contrib/meson/meson/tests/meson.build
index 392bcf2..7800475 100644
--- a/contrib/meson/tests/meson.build
+++ b/contrib/meson/meson/tests/meson.build
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # #############################################################################
 
-lz4_root_dir = '../../..'
+lz4_root_dir = '../../../..'
 programs_dir_inc = include_directories(join_paths(lz4_root_dir, 'programs'))
 lib_dir_inc = include_directories(join_paths(lz4_root_dir, 'lib'))
 
diff --git a/doc/lz4_Frame_format.md b/doc/lz4_Frame_format.md
index a0514e0..7e08841 100644
--- a/doc/lz4_Frame_format.md
+++ b/doc/lz4_Frame_format.md
@@ -16,7 +16,7 @@
 
 ### Version
 
-1.6.1 (30/01/2018)
+1.6.2 (12/08/2020)
 
 
 Introduction
@@ -75,7 +75,7 @@
 3 to 15 Bytes, to be detailed in its own paragraph,
 as it is the most important part of the spec.
 
-The combined __Magic Number__ and __Frame Descriptor__ fields are sometimes
+The combined _Magic_Number_ and _Frame_Descriptor_ fields are sometimes
 called ___LZ4 Frame Header___. Its size varies between 7 and 19 bytes.
 
 __Data Blocks__
@@ -85,14 +85,13 @@
 
 __EndMark__
 
-The flow of blocks ends when the last data block has a size of “0”.
-The size is expressed as a 32-bits value.
+The flow of blocks ends when the last data block is followed by
+the 32-bit value `0x00000000`.
 
 __Content Checksum__
 
-Content Checksum verify that the full content has been decoded correctly.
-The content checksum is the result
-of [xxh32() hash function](https://github.com/Cyan4973/xxHash)
+_Content_Checksum_ verify that the full content has been decoded correctly.
+The content checksum is the result of [xxHash-32 algorithm]
 digesting the original (decoded) data as input, and a seed of zero.
 Content checksum is only present when its associated flag
 is set in the frame descriptor.
@@ -101,7 +100,7 @@
 and also that the encoding/decoding process itself generated no distortion.
 Its usage is recommended.
 
-The combined __EndMark__ and __Content Checksum__ fields might sometimes be
+The combined _EndMark_ and _Content_Checksum_ fields might sometimes be
 referred to as ___LZ4 Frame Footer___. Its size varies between 4 and 8 bytes.
 
 __Frame Concatenation__
@@ -261,38 +260,48 @@
 
 This field uses 4-bytes, format is little-endian.
 
-The highest bit is “1” if data in the block is uncompressed.
+If the highest bit is set (`1`), the block is uncompressed.
 
-The highest bit is “0” if data in the block is compressed by LZ4.
+If the highest bit is not set (`0`), the block is LZ4-compressed,
+using the [LZ4 block format specification](https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md).
 
-All other bits give the size, in bytes, of the following data block.
+All other bits give the size, in bytes, of the data section.
 The size does not include the block checksum if present.
 
-Block Size shall never be larger than Block Maximum Size.
-Such a thing could potentially happen for non-compressible sources.
-In such a case, such data block shall be passed using uncompressed format.
+_Block_Size_ shall never be larger than _Block_Maximum_Size_.
+Such an outcome could potentially happen for non-compressible sources.
+In such a case, such data block must be passed using uncompressed format.
+
+A value of `0x00000000` is invalid, and signifies an _EndMark_ instead.
+Note that this is different from a value of `0x80000000` (highest bit set),
+which is an uncompressed block of size 0 (empty),
+which is valid, and therefore doesn't end a frame.
+Note that, if _Block_checksum_ is enabled,
+even an empty block must be followed by a 32-bit block checksum.
 
 __Data__
 
 Where the actual data to decode stands.
 It might be compressed or not, depending on previous field indications.
 
-When compressed, the data must respect the [LZ4 block format specification](https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md).
+When compressed, the data must respect the [LZ4 block format specification](https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md).
 
-Note that the block is not necessarily full.
-Uncompressed size of data can be any size, up to "Block Maximum Size”,
+Note that a block is not necessarily full.
+Uncompressed size of data can be any size __up to__ _Block_Maximum_Size_,
 so it may contain less data than the maximum block size.
 
 __Block checksum__
 
 Only present if the associated flag is set.
 This is a 4-bytes checksum value, in little endian format,
-calculated by using the xxHash-32 algorithm on the raw (undecoded) data block,
+calculated by using the [xxHash-32 algorithm] on the __raw__ (undecoded) data block,
 and a seed of zero.
 The intention is to detect data corruption (storage or transmission errors)
 before decoding.
 
-Block checksum is cumulative with Content checksum.
+_Block_checksum_ can be cumulative with _Content_checksum_.
+
+[xxHash-32 algorithm]: https://github.com/Cyan4973/xxHash/blob/release/doc/xxhash_spec.md
 
 
 Skippable Frames
@@ -389,6 +398,8 @@
 Version changes
 ---------------
 
+1.6.2 : clarifies specification of _EndMark_
+
 1.6.1 : introduced terms "LZ4 Frame Header" and "LZ4 Frame Footer"
 
 1.6.0 : restored Dictionary ID field in Frame header
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index a477584..47fe18d 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.9.2 Manual</title>
+<title>1.9.3 Manual</title>
 </head>
 <body>
-<h1>1.9.2 Manual</h1>
+<h1>1.9.3 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -16,7 +16,7 @@
 <li><a href="#Chapter6">Streaming Compression Functions</a></li>
 <li><a href="#Chapter7">Streaming Decompression Functions</a></li>
 <li><a href="#Chapter8">Experimental section</a></li>
-<li><a href="#Chapter9">PRIVATE DEFINITIONS</a></li>
+<li><a href="#Chapter9">Private Definitions</a></li>
 <li><a href="#Chapter10">Obsolete Functions</a></li>
 </ol>
 <hr>
@@ -117,7 +117,8 @@
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
     An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
 </p></pre><BR>
 
 <pre><b>int LZ4_sizeofState(void);
@@ -140,31 +141,53 @@
                New value is necessarily <= input value.
  @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
            or 0 if compression fails.
+
+ Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+        the produced compressed content could, in specific circumstances,
+        require to be decompressed into a destination buffer larger
+        by at least 1 byte than the content to decompress.
+        If an application uses `LZ4_compress_destSize()`,
+        it's highly recommended to update liblz4 to v1.9.2 or better.
+        If this can't be done or ensured,
+        the receiving decompression function should provide
+        a dstCapacity which is > decompressedSize, by at least 1 byte.
+        See https://github.com/lz4/lz4/issues/859 for details
+ 
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 </b><p>  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
   into destination buffer 'dst' of size 'dstCapacity'.
   Up to 'targetOutputSize' bytes will be decoded.
-  The function stops decoding on reaching this objective,
-  which can boost performance when only the beginning of a block is required.
+  The function stops decoding on reaching this objective.
+  This can be useful to boost performance
+  whenever only the beginning of a block is required.
 
- @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
            If source stream is detected malformed, function returns a negative result.
 
-  Note : @return can be < targetOutputSize, if compressed block contains less data.
+  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
 
-  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
-           and expects targetOutputSize <= dstCapacity.
-           It effectively stops decoding on reaching targetOutputSize,
+  Note 2 : targetOutputSize must be <= dstCapacity
+
+  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
            so dstCapacity is kind of redundant.
-           This is because in a previous version of this function,
-           decoding operation would not "break" a sequence in the middle.
-           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+           This is because in older versions of this function,
+           decoding operation would still write complete sequences.
+           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
            it could write more bytes, though only up to dstCapacity.
            Some "margin" used to be required for this operation to work properly.
-           This is no longer necessary.
-           The function nonetheless keeps its signature, in an effort to not break API.
+           Thankfully, this is no longer necessary.
+           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+
+  Note 4 : If srcSize is the exact size of the block,
+           then targetOutputSize can be any value,
+           including larger than the block's decompressed size.
+           The function will, at most, generate block's decompressed size.
+
+  Note 5 : If srcSize is _larger_ than block's compressed size,
+           then targetOutputSize **MUST** be <= block's decompressed size.
+           Otherwise, *silent corruption will occur*.
  
 </p></pre><BR>
 
@@ -423,39 +446,33 @@
 </b></pre><BR>
 <pre><b>#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  </b>/**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */<b>
 </b></pre><BR>
-<a name="Chapter9"></a><h2>PRIVATE DEFINITIONS</h2><pre>
+<a name="Chapter9"></a><h2>Private Definitions</h2><pre>
  Do not use these definitions directly.
  They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- Accessing members will expose code to API and/or ABI break in future versions of the library.
+ Accessing members will expose user code to API and/or ABI break in future versions of the library.
 <BR></pre>
 
 <pre><b>typedef struct {
-    const uint8_t* externalDict;
+    const LZ4_byte* externalDict;
     size_t extDictSize;
-    const uint8_t* prefixEnd;
+    const LZ4_byte* prefixEnd;
     size_t prefixSize;
 } LZ4_streamDecode_t_internal;
 </b></pre><BR>
-<pre><b>typedef struct {
-    const unsigned char* externalDict;
-    const unsigned char* prefixEnd;
-    size_t extDictSize;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-</b></pre><BR>
-<pre><b>#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) </b>/*AS-400*/ )<b>
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+<pre><b>#define LZ4_STREAMSIZE       16416  </b>/* static size, for inter-version compatibility */<b>
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
 union LZ4_stream_u {
-    unsigned long long table[LZ4_STREAMSIZE_U64];
+    void* table[LZ4_STREAMSIZE_VOIDP];
     LZ4_stream_t_internal internal_donotuse;
-} ;  </b>/* previously typedef'd to LZ4_stream_t */<b>
-</b><p>  information structure to track an LZ4 stream.
+}; </b>/* previously typedef'd to LZ4_stream_t */<b>
+</b><p>  Do not use below internal definitions directly !
+  Declare or allocate an LZ4_stream_t instead.
   LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
   The structure definition can be convenient for static allocation
   (on stack, or as part of larger structure).
   Init this structure with LZ4_initStream() before first use.
   note : only use this definition in association with static linking !
-    this definition is not API/ABI safe, and may change in a future version.
+  this definition is not API/ABI safe, and may change in future versions.
  
 </p></pre><BR>
 
@@ -494,18 +511,17 @@
 <pre><b>#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
 #  define LZ4_DEPRECATED(message)   </b>/* disable deprecation warnings */<b>
 #else
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #  if defined (__cplusplus) && (__cplusplus >= 201402) </b>/* C++14 or greater */<b>
 #    define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  elif defined(_MSC_VER)
 #    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   </b>/* disabled */<b>
 #  endif
 #endif </b>/* LZ4_DISABLE_DEPRECATE_WARNINGS */<b>
 </b><p>
@@ -520,18 +536,39 @@
  
 </p></pre><BR>
 
-<pre><b></b><p>  These functions used to be faster than LZ4_decompress_safe(),
-  but it has changed, and they are now slower than LZ4_decompress_safe().
+<pre><b>LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+</b><p></p></pre><BR>
+
+<pre><b>LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+</b><p></p></pre><BR>
+
+<pre><b>LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+</b><p></p></pre><BR>
+
+<pre><b>LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+</b><p>  These functions used to be faster than LZ4_decompress_safe(),
+  but this is no longer the case. They are now slower.
   This is because LZ4_decompress_fast() doesn't know the input size,
-  and therefore must progress more cautiously in the input buffer to not read beyond the end of block.
+  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
   On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
   As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
 
   The last remaining LZ4_decompress_fast() specificity is that
   it can decompress a block without knowing its compressed size.
-  Such functionality could be achieved in a more secure manner,
-  by also providing the maximum size of input buffer,
-  but it would require new prototypes, and adaptation of the implementation to this new use case.
+  Such functionality can be achieved in a more secure manner
+  by employing LZ4_decompress_safe_partial().
 
   Parameters:
   originalSize : is the uncompressed size to regenerate.
diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html
index 72f27c8..2758306 100644
--- a/doc/lz4frame_manual.html
+++ b/doc/lz4frame_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.9.2 Manual</title>
+<title>1.9.3 Manual</title>
 </head>
 <body>
-<h1>1.9.2 Manual</h1>
+<h1>1.9.3 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -167,7 +167,7 @@
  @return is always the same for a srcSize and prefsPtr.
   prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
   tech details :
- @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
   It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
  @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
  
@@ -299,8 +299,10 @@
                                    void* dstBuffer, size_t* dstSizePtr,
                                    const void* srcBuffer, size_t* srcSizePtr,
                                    const LZ4F_decompressOptions_t* dOptPtr);
-</b><p>  Call this function repetitively to regenerate compressed data from `srcBuffer`.
-  The function will read up to *srcSizePtr bytes from srcBuffer,
+</b><p>  Call this function repetitively to regenerate data compressed in `srcBuffer`.
+
+  The function requires a valid dctx state.
+  It will read up to *srcSizePtr bytes from srcBuffer,
   and decompress data into dstBuffer, of capacity *dstSizePtr.
 
   The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
diff --git a/examples/Makefile b/examples/Makefile
index 6a34b33..3ec3e21 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -19,7 +19,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 # You can contact the author at :
-#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 source repository : https://github.com/lz4/lz4
 #  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
 # ##########################################################################
 # This makefile compile and test
diff --git a/examples/frameCompress.c b/examples/frameCompress.c
index a189329..aac4a3b 100644
--- a/examples/frameCompress.c
+++ b/examples/frameCompress.c
@@ -32,12 +32,12 @@
 {
     size_t const writtenSize = fwrite(buf, eltSize, nbElt, f);
     size_t const expectedSize = eltSize * nbElt;
-    assert(expectedSize / nbElt == eltSize);   /* check overflow */
+    if (nbElt>0) assert(expectedSize / nbElt == eltSize);  /* check overflow */
     if (writtenSize < expectedSize) {
         if (ferror(f))  /* note : ferror() must follow fwrite */
             fprintf(stderr, "Write failed \n");
         else
-            fprintf(stderr, "Short write \n");
+            fprintf(stderr, "Write too short \n");
         exit(1);
     }
 }
diff --git a/examples/streaming_api_basics.md b/examples/streaming_api_basics.md
index 90065e4..1ccc6e3 100644
--- a/examples/streaming_api_basics.md
+++ b/examples/streaming_api_basics.md
@@ -10,7 +10,7 @@
    such as LZ4 command line utility, node-lz4, etc.
  - "Block" API : This is recommended for simple purpose.
    It compress single raw memory block to LZ4 memory block and vice versa.
- - "Streaming" API : This is designed for complex thing.
+ - "Streaming" API : This is designed for complex things.
    For example, compress huge stream data in restricted memory environment.
 
 Basically, you should use "Auto Framing" API.
@@ -19,13 +19,13 @@
 
 ## What is difference between Block and Streaming API ?
 
-Block API (de)compresses single contiguous memory block.
-In other words, LZ4 library find redundancy from single contiguous memory block.
-Streaming API does same thing but (de)compress multiple adjacent contiguous memory block.
+Block API (de)compresses a single contiguous memory block.
+In other words, LZ4 library finds redundancy from a single contiguous memory block.
+Streaming API does same thing but (de)compresses multiple adjacent contiguous memory blocks.
 So LZ4 library could find more redundancy than Block API.
 
 The following figure shows difference between API and block sizes.
-In these figures, original data is splitted to 4KiBytes contiguous chunks.
+In these figures, the original data is split into 4KiBytes contiguous chunks.
 
 ```
 Original Data
@@ -81,7 +81,7 @@
 
 ## Restriction of Streaming API
 
-For the efficiency, Streaming API doesn't keep mirror copy of dependent (de)compressed memory.
+For efficiency, Streaming API doesn't keep a mirror copy of dependent (de)compressed memory.
 This means users should keep these dependent (de)compressed memory explicitly.
 Usually, "Dependent memory" is previous adjacent contiguous memory up to 64KiBytes.
 LZ4 will not access further memories.
diff --git a/lib/Makefile b/lib/Makefile
index 8f21d3d..c12949b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,7 +28,7 @@
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # You can contact the author at :
-#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 source repository : https://github.com/lz4/lz4
 #  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
 # ################################################################
 
diff --git a/lib/README.md b/lib/README.md
index cba2c34..e2af868 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -35,21 +35,22 @@
 
 Definitions which are not guaranteed to remain stable in future versions,
 are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`.
-As the name implies, these definitions can only be invoked
+As the name strongly implies, these definitions should only be invoked
 in the context of static linking ***only***.
 Otherwise, dependent application may fail on API or ABI break in the future.
-The associated symbols are also not present in dynamic library by default.
+The associated symbols are also not exposed by the dynamic library by default.
 Should they be nonetheless needed, it's possible to force their publication
-by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
+by using build macros `LZ4_PUBLISH_STATIC_FUNCTIONS`
+and `LZ4F_PUBLISH_STATIC_FUNCTIONS`.
 
 
 #### Build macros
 
-The following build macro can be selected at compilation time :
+The following build macro can be selected to adjust source code behavior at compilation time :
 
-- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
-  This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
-  It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
+- `LZ4_FAST_DEC_LOOP` : this triggers a speed optimized decompression loop, more powerful on modern cpus.
+  This loop works great on `x86`, `x64` and `aarch64` cpus, and is automatically enabled for them.
+  It's also possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
   For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
   and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
 
@@ -65,8 +66,24 @@
   Should this be a problem, it's generally possible to make the compiler ignore these warnings,
   for example with `-Wno-deprecated-declarations` on `gcc`,
   or `_CRT_SECURE_NO_WARNINGS` for Visual Studio.
-  Another method is to define `LZ4_DISABLE_DEPRECATE_WARNINGS`
-  before including the LZ4 header files.
+  This build macro offers another project-specific method
+  by defining `LZ4_DISABLE_DEPRECATE_WARNINGS` before including the LZ4 header files.
+
+- `LZ4_USER_MEMORY_FUNCTIONS` : replace calls to <stdlib>'s `malloc`, `calloc` and `free`
+  by user-defined functions, which must be called `LZ4_malloc()`, `LZ4_calloc()` and `LZ4_free()`.
+  User functions must be available at link time.
+
+- `LZ4_FORCE_SW_BITCOUNT` : by default, the compression algorithm tries to determine lengths
+  by using bitcount instructions, generally implemented as fast single instructions in many cpus.
+  In case the target cpus doesn't support it, or compiler intrinsic doesn't work, or feature bad performance,
+  it's possible to use an optimized software path instead.
+  This is achieved by setting this build macros .
+  In most cases, it's not expected to be necessary,
+  but it can be legitimately considered for less common platforms.
+
+- `LZ4_ALIGN_TEST` : alignment test ensures that the memory area
+  passed as argument to become a compression state is suitably aligned.
+  This test can be disabled if it proves flaky, by setting this value to 0.
 
 
 #### Amalgamation
@@ -102,7 +119,7 @@
 
 #### Miscellaneous
 
-Other files present in the directory are not source code. There are :
+Other files present in the directory are not source code. They are :
 
  - `LICENSE` : contains the BSD license text
  - `Makefile` : `make` script to compile and install lz4 library (static and dynamic)
diff --git a/lib/lz4.c b/lib/lz4.c
index 9808d70..9f5e9bf 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -45,10 +45,16 @@
 #endif
 
 /*
- * ACCELERATION_DEFAULT :
+ * LZ4_ACCELERATION_DEFAULT :
  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  */
-#define ACCELERATION_DEFAULT 1
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
 
 
 /*-************************************
@@ -82,6 +88,7 @@
  * Define this parameter if your target system or compiler does not support hardware bit count
  */
 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
+#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
 #  define LZ4_FORCE_SW_BITCOUNT
 #endif
 
@@ -114,10 +121,9 @@
 /*-************************************
 *  Compiler Options
 **************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  include <intrin.h>
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
+#  include <intrin.h>               /* only present in VS2005+ */
+#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
 #endif  /* _MSC_VER */
 
 #ifndef LZ4_FORCE_INLINE
@@ -136,7 +142,7 @@
 #  endif  /* _MSC_VER */
 #endif /* LZ4_FORCE_INLINE */
 
-/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
  * together with a simple 8-byte copy loop as a fall-back path.
  * However, this optimization hurts the decompression speed by >30%,
@@ -151,11 +157,11 @@
  * of LZ4_wildCopy8 does not affect the compression speed.
  */
 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
-#  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
-#  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
+#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
+#  undef LZ4_FORCE_INLINE
+#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
 #else
-#  define LZ4_FORCE_O2_GCC_PPC64LE
-#  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
+#  define LZ4_FORCE_O2
 #endif
 
 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
@@ -171,14 +177,33 @@
 #define unlikely(expr)   expect((expr) != 0, 0)
 #endif
 
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
 
 /*-************************************
 *  Memory routines
 **************************************/
-#include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOC(s)          malloc(s)
-#define ALLOC_AND_ZERO(s) calloc(1,s)
-#define FREEMEM(p)        free(p)
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void  LZ4_free(void* p);
+# define ALLOC(s)          LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p)        LZ4_free(p)
+#else
+# include <stdlib.h>   /* malloc, calloc, free */
+# define ALLOC(s)          malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p)        free(p)
+#endif
+
 #include <string.h>   /* memset, memcpy */
 #define MEM_INIT(p,v,s)   memset((p),(v),(s))
 
@@ -225,21 +250,27 @@
 
 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
 #  include <stdio.h>
-static int g_debuglog_enable = 1;
-#  define DEBUGLOG(l, ...) {                                  \
-                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
-                    fprintf(stderr, __FILE__ ": ");           \
-                    fprintf(stderr, __VA_ARGS__);             \
-                    fprintf(stderr, " \n");                   \
-            }   }
+   static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                          \
+        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+            fprintf(stderr, __FILE__ ": ");           \
+            fprintf(stderr, __VA_ARGS__);             \
+            fprintf(stderr, " \n");                   \
+    }   }
 #else
-#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#  define DEBUGLOG(l, ...) {}    /* disabled */
 #endif
 
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+    return ((size_t)ptr & (alignment -1)) == 0;
+}
+
 
 /*-************************************
 *  Types
 **************************************/
+#include <limits.h>
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 # include <stdint.h>
   typedef  uint8_t BYTE;
@@ -249,6 +280,9 @@
   typedef uint64_t U64;
   typedef uintptr_t uptrval;
 #else
+# if UINT_MAX != 4294967295UL
+#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
   typedef unsigned char       BYTE;
   typedef unsigned short      U16;
   typedef unsigned int        U32;
@@ -273,6 +307,21 @@
 /*-************************************
 *  Reading and writing into memory
 **************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
 static unsigned LZ4_isLittleEndian(void)
 {
     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
@@ -307,27 +356,27 @@
 
 static U16 LZ4_read16(const void* memPtr)
 {
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 static U32 LZ4_read32(const void* memPtr)
 {
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 static reg_t LZ4_read_ARCH(const void* memPtr)
 {
-    reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 static void LZ4_write16(void* memPtr, U16 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    LZ4_memcpy(memPtr, &value, sizeof(value));
 }
 
 static void LZ4_write32(void* memPtr, U32 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    LZ4_memcpy(memPtr, &value, sizeof(value));
 }
 
 #endif /* LZ4_FORCE_MEMORY_ACCESS */
@@ -355,14 +404,14 @@
 }
 
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+LZ4_FORCE_INLINE
 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
     BYTE* d = (BYTE*)dstPtr;
     const BYTE* s = (const BYTE*)srcPtr;
     BYTE* const e = (BYTE*)dstEnd;
 
-    do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 }
 
 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
@@ -370,12 +419,12 @@
 
 
 #ifndef LZ4_FAST_DEC_LOOP
-#  if defined(__i386__) || defined(__x86_64__)
+#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
 #    define LZ4_FAST_DEC_LOOP 1
 #  elif defined(__aarch64__) && !defined(__clang__)
      /* On aarch64, we disable this optimization for clang because on certain
-      * mobile chipsets and clang, it reduces performance. For more information
-      * refer to https://github.com/lz4/lz4/pull/707. */
+      * mobile chipsets, performance is reduced with clang. For information
+      * refer to https://github.com/lz4/lz4/pull/707 */
 #    define LZ4_FAST_DEC_LOOP 1
 #  else
 #    define LZ4_FAST_DEC_LOOP 0
@@ -384,20 +433,22 @@
 
 #if LZ4_FAST_DEC_LOOP
 
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_FORCE_INLINE void
 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 {
+    assert(srcPtr + offset == dstPtr);
     if (offset < 8) {
+        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
         dstPtr[0] = srcPtr[0];
         dstPtr[1] = srcPtr[1];
         dstPtr[2] = srcPtr[2];
         dstPtr[3] = srcPtr[3];
         srcPtr += inc32table[offset];
-        memcpy(dstPtr+4, srcPtr, 4);
+        LZ4_memcpy(dstPtr+4, srcPtr, 4);
         srcPtr -= dec64table[offset];
         dstPtr += 8;
     } else {
-        memcpy(dstPtr, srcPtr, 8);
+        LZ4_memcpy(dstPtr, srcPtr, 8);
         dstPtr += 8;
         srcPtr += 8;
     }
@@ -408,49 +459,48 @@
 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
  * this version copies two times 16 bytes (instead of one time 32 bytes)
  * because it must be compatible with offsets >= 16. */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_FORCE_INLINE void
 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
     BYTE* d = (BYTE*)dstPtr;
     const BYTE* s = (const BYTE*)srcPtr;
     BYTE* const e = (BYTE*)dstEnd;
 
-    do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
 }
 
 /* LZ4_memcpy_using_offset()  presumes :
  * - dstEnd >= dstPtr + MINMATCH
  * - there is at least 8 bytes available to write after dstEnd */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_FORCE_INLINE void
 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 {
     BYTE v[8];
 
     assert(dstEnd >= dstPtr + MINMATCH);
-    LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
 
     switch(offset) {
     case 1:
-        memset(v, *srcPtr, 8);
+        MEM_INIT(v, *srcPtr, 8);
         break;
     case 2:
-        memcpy(v, srcPtr, 2);
-        memcpy(&v[2], srcPtr, 2);
-        memcpy(&v[4], &v[0], 4);
+        LZ4_memcpy(v, srcPtr, 2);
+        LZ4_memcpy(&v[2], srcPtr, 2);
+        LZ4_memcpy(&v[4], v, 4);
         break;
     case 4:
-        memcpy(v, srcPtr, 4);
-        memcpy(&v[4], srcPtr, 4);
+        LZ4_memcpy(v, srcPtr, 4);
+        LZ4_memcpy(&v[4], srcPtr, 4);
         break;
     default:
         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
         return;
     }
 
-    memcpy(dstPtr, v, 8);
+    LZ4_memcpy(dstPtr, v, 8);
     dstPtr += 8;
     while (dstPtr < dstEnd) {
-        memcpy(dstPtr, v, 8);
+        LZ4_memcpy(dstPtr, v, 8);
         dstPtr += 8;
     }
 }
@@ -462,75 +512,92 @@
 **************************************/
 static unsigned LZ4_NbCommonBytes (reg_t val)
 {
+    assert(val != 0);
     if (LZ4_isLittleEndian()) {
-        if (sizeof(val)==8) {
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+        if (sizeof(val) == 8) {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+            return (unsigned)_tzcnt_u64(val) >> 3;
+#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
             unsigned long r = 0;
-            _BitScanForward64( &r, (U64)val );
-            return (int)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            _BitScanForward64(&r, (U64)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
             return (unsigned)__builtin_ctzll((U64)val) >> 3;
 #       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
-                                                     0, 3, 1, 3, 1, 4, 2, 7,
-                                                     0, 2, 3, 6, 1, 5, 3, 5,
-                                                     1, 3, 4, 4, 2, 5, 6, 7,
-                                                     7, 0, 1, 2, 3, 3, 4, 6,
-                                                     2, 6, 5, 5, 3, 4, 5, 6,
-                                                     7, 1, 2, 4, 6, 4, 4, 5,
-                                                     7, 2, 6, 5, 7, 6, 7, 7 };
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+            const U64 m = 0x0101010101010101ULL;
+            val ^= val - 1;
+            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
 #       endif
         } else /* 32 bits */ {
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
             unsigned long r;
-            _BitScanForward( &r, (U32)val );
-            return (int)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            _BitScanForward(&r, (U32)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
             return (unsigned)__builtin_ctz((U32)val) >> 3;
 #       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
-                                                     3, 2, 2, 1, 3, 2, 0, 1,
-                                                     3, 3, 1, 2, 2, 2, 2, 0,
-                                                     3, 1, 2, 0, 1, 0, 1, 1 };
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+            const U32 m = 0x01010101;
+            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
 #       endif
         }
     } else   /* Big Endian CPU */ {
-        if (sizeof(val)==8) {   /* 64-bits */
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-            _BitScanReverse64( &r, val );
-            return (unsigned)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+        if (sizeof(val)==8) {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
             return (unsigned)__builtin_clzll((U64)val) >> 3;
 #       else
+#if 1
+            /* this method is probably faster,
+             * but adds a 128 bytes lookup table */
+            static const unsigned char ctz7_tab[128] = {
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+            };
+            U64 const mask = 0x0101010101010101ULL;
+            U64 const t = (((val >> 8) - mask) | val) & mask;
+            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+            /* this method doesn't consume memory space like the previous one,
+             * but it contains several branches,
+             * that may end up slowing execution */
             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
-                Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
-                Note that this code path is never triggered in 32-bits mode. */
+            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+            Note that this code path is never triggered in 32-bits mode. */
             unsigned r;
             if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
             r += (!val);
             return r;
+#endif
 #       endif
         } else /* 32 bits */ {
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-            _BitScanReverse( &r, (unsigned long)val );
-            return (unsigned)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
             return (unsigned)__builtin_clz((U32)val) >> 3;
 #       else
-            unsigned r;
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-            r += (!val);
-            return r;
+            val >>= 8;
+            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+              (val + 0x00FF0000)) >> 24;
+            return (unsigned)val ^ 3;
 #       endif
         }
     }
 }
 
+
 #define STEPSIZE sizeof(reg_t)
 LZ4_FORCE_INLINE
 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
@@ -605,7 +672,7 @@
 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
-int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
 
 
 /*-************************************
@@ -628,7 +695,7 @@
 /*-******************************
 *  Compression functions
 ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
     if (tableType == byU16)
         return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
@@ -636,7 +703,7 @@
         return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
 }
 
-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 {
     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
     if (LZ4_isLittleEndian()) {
@@ -654,7 +721,7 @@
     return LZ4_hash4(LZ4_read32(p), tableType);
 }
 
-static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
 {
     switch (tableType)
     {
@@ -666,7 +733,7 @@
     }
 }
 
-static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 {
     switch (tableType)
     {
@@ -678,7 +745,7 @@
     }
 }
 
-static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
                                   void* tableBase, tableType_t const tableType,
                             const BYTE* srcBase)
 {
@@ -703,7 +770,7 @@
  * Assumption 1 : only valid if tableType == byU32 or byU16.
  * Assumption 2 : h is presumed valid (within limits of hash table)
  */
-static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 {
     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
     if (tableType == byU32) {
@@ -739,22 +806,13 @@
 LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
            const int inputSize,
            const tableType_t tableType) {
-    /* If compression failed during the previous step, then the context
-     * is marked as dirty, therefore, it has to be fully reset.
-     */
-    if (cctx->dirty) {
-        DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
-        MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
-        return;
-    }
-
     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
      * therefore safe to use no matter what mode we're in. Otherwise, we figure
      * out if it's safe to leave as is or whether it needs to be reset.
      */
-    if (cctx->tableType != clearedTable) {
+    if ((tableType_t)cctx->tableType != clearedTable) {
         assert(inputSize >= 0);
-        if (cctx->tableType != tableType
+        if ((tableType_t)cctx->tableType != tableType
           || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
           || ((tableType == byU32) && cctx->currentOffset > 1 GB)
           || tableType == byPtr
@@ -763,7 +821,7 @@
             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
             cctx->currentOffset = 0;
-            cctx->tableType = clearedTable;
+            cctx->tableType = (U32)clearedTable;
         } else {
             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
         }
@@ -785,8 +843,12 @@
 }
 
 /** LZ4_compress_generic() :
-    inlined, to ensure branches are decided at compilation time */
-LZ4_FORCE_INLINE int LZ4_compress_generic(
+ *  inlined, to ensure branches are decided at compilation time.
+ *  Presumed already validated at this stage:
+ *  - source != NULL
+ *  - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
                  LZ4_stream_t_internal* const cctx,
                  const char* const source,
                  char* const dest,
@@ -815,7 +877,7 @@
 
     int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
     U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
-    const BYTE* const dictEnd = dictionary + dictSize;
+    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
     const BYTE* anchor = (const BYTE*) source;
     const BYTE* const iend = ip + inputSize;
     const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
@@ -823,7 +885,7 @@
 
     /* the dictCtx currentOffset is indexed on the start of the dictionary,
      * while a dictionary in the current context precedes the currentOffset */
-    const BYTE* dictBase = (dictDirective == usingDictCtx) ?
+    const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
                             dictionary + dictSize - dictCtx->currentOffset :
                             dictionary + dictSize - startIndex;
 
@@ -833,11 +895,11 @@
     U32 offset = 0;
     U32 forwardH;
 
-    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
+    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+    assert(ip != NULL);
     /* If init conditions are not met, we don't have to mark stream
      * as having dirty context, since no action was taken yet */
     if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }           /* Unsupported inputSize, too large (or negative) */
     if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
     assert(acceleration >= 1);
@@ -854,7 +916,7 @@
         cctx->dictSize += (U32)inputSize;
     }
     cctx->currentOffset += (U32)inputSize;
-    cctx->tableType = (U16)tableType;
+    cctx->tableType = (U32)tableType;
 
     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 
@@ -1147,13 +1209,14 @@
             if (outputDirective == fillOutput) {
                 /* adapt lastRun to fill 'dst' */
                 assert(olimit >= op);
-                lastRun  = (size_t)(olimit-op) - 1;
-                lastRun -= (lastRun+240)/255;
+                lastRun  = (size_t)(olimit-op) - 1/*token*/;
+                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
             } else {
                 assert(outputDirective == limitedOutput);
                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
             }
         }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
         if (lastRun >= RUN_MASK) {
             size_t accumulator = lastRun - RUN_MASK;
             *op++ = RUN_MASK << ML_BITS;
@@ -1162,7 +1225,7 @@
         } else {
             *op++ = (BYTE)(lastRun<<ML_BITS);
         }
-        memcpy(op, anchor, lastRun);
+        LZ4_memcpy(op, anchor, lastRun);
         ip = anchor + lastRun;
         op += lastRun;
     }
@@ -1170,18 +1233,60 @@
     if (outputDirective == fillOutput) {
         *inputConsumed = (int) (((const char*)ip)-source);
     }
-    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
     result = (int)(((char*)op) - dest);
     assert(result > 0);
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
     return result;
 }
 
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time;
+ *  takes care of src == (NULL, 0)
+ *  and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const src,
+                 char* const dst,
+                 const int srcSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int dstCapacity,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+                srcSize, dstCapacity);
+
+    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
+    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
+        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
+        DEBUGLOG(5, "Generating an empty block");
+        assert(outputDirective == notLimited || dstCapacity >= 1);
+        assert(dst != NULL);
+        dst[0] = 0;
+        if (outputDirective == fillOutput) {
+            assert (inputConsumed != NULL);
+            *inputConsumed = 0;
+        }
+        return 1;
+    }
+    assert(src != NULL);
+
+    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+                inputConsumed, /* only written into if outputDirective == fillOutput */
+                dstCapacity, outputDirective,
+                tableType, dictDirective, dictIssue, acceleration);
+}
+
 
 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
     LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
     assert(ctx != NULL);
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
         if (inputSize < LZ4_64Klimit) {
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
@@ -1211,7 +1316,8 @@
 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
 {
     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
 
     if (dstCapacity >= LZ4_compressBound(srcSize)) {
         if (srcSize < LZ4_64Klimit) {
@@ -1270,22 +1376,6 @@
 }
 
 
-/* hidden debug function */
-/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
-int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
-{
-    LZ4_stream_t ctx;
-    LZ4_initStream(&ctx, sizeof(ctx));
-
-    if (srcSize < LZ4_64Klimit) {
-        return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16,    noDict, noDictIssue, acceleration);
-    } else {
-        tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr;
-        return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
-    }
-}
-
-
 /* Note!: This function leaves the stream in an unclean/broken state!
  * It is not safe to subsequently use the same state with a _fastReset() or
  * _continue() call without resetting it. */
@@ -1340,27 +1430,23 @@
     return lz4s;
 }
 
-#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
-                     it reports an aligment of 8-bytes,
-                     while actually aligning LZ4_stream_t on 4 bytes. */
 static size_t LZ4_stream_t_alignment(void)
 {
-    struct { char c; LZ4_stream_t t; } t_a;
-    return sizeof(t_a) - sizeof(t_a.t);
-}
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_stream_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+    return 1;  /* effectively disabled */
 #endif
+}
 
 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
 {
     DEBUGLOG(5, "LZ4_initStream");
     if (buffer == NULL) { return NULL; }
     if (size < sizeof(LZ4_stream_t)) { return NULL; }
-#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
-                     it reports an aligment of 8-bytes,
-                     while actually aligning LZ4_stream_t on 4 bytes. */
-    if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */
-#endif
-    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
+    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
     return (LZ4_stream_t*)buffer;
 }
 
@@ -1369,7 +1455,7 @@
 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
 {
     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
-    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
 }
 
 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
@@ -1418,7 +1504,7 @@
     base = dictEnd - dict->currentOffset;
     dict->dictionary = p;
     dict->dictSize = (U32)(dictEnd - p);
-    dict->tableType = tableType;
+    dict->tableType = (U32)tableType;
 
     while (p <= dictEnd-HASH_UNIT) {
         LZ4_putPosition(p, dict->hashTable, tableType, base);
@@ -1436,12 +1522,6 @@
              workingStream, dictionaryStream,
              dictCtx != NULL ? dictCtx->dictSize : 0);
 
-    /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
-     * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
-     * marked as having dirty context, e.g. requiring full reset.
-     */
-    LZ4_resetStream_fast(workingStream);
-
     if (dictCtx != NULL) {
         /* If the current offset is zero, we will never look in the
          * external dictionary context, since there is no value a table
@@ -1493,9 +1573,9 @@
 
     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
 
-    if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */
     LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
 
     /* invalidate tiny dictionaries */
     if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
@@ -1538,7 +1618,7 @@
                  * cost to copy the dictionary's tables into the active context,
                  * so that the compression loop is only looking into one table.
                  */
-                memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
+                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
             } else {
                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
@@ -1593,7 +1673,9 @@
     if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
     if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
 
-    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0)
+        memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
 
     dict->dictionary = (const BYTE*)safeBuffer;
     dict->dictSize = (U32)dictSize;
@@ -1623,25 +1705,27 @@
  */
 typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
 LZ4_FORCE_INLINE unsigned
-read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+                     int loop_check, int initial_check,
+                     variable_length_error* error)
 {
-  unsigned length = 0;
-  unsigned s;
-  if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
-    *error = initial_error;
-    return length;
-  }
-  do {
-    s = **ip;
-    (*ip)++;
-    length += s;
-    if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
-      *error = loop_error;
-      return length;
+    U32 length = 0;
+    U32 s;
+    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+        *error = initial_error;
+        return length;
     }
-  } while (s==255);
+    do {
+        s = **ip;
+        (*ip)++;
+        length += s;
+        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+            *error = loop_error;
+            return length;
+        }
+    } while (s==255);
 
-  return length;
+    return length;
 }
 
 /*! LZ4_decompress_generic() :
@@ -1722,7 +1806,7 @@
             /* decode literal length */
             if (length == RUN_MASK) {
                 variable_length_error error = ok;
-                length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
                 if (error == initial_error) { goto _output_error; }
                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -1746,12 +1830,12 @@
                     /* We don't need to check oend, since we check it once for each loop below */
                     if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
                     /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
-                    memcpy(op, ip, 16);
+                    LZ4_memcpy(op, ip, 16);
                 } else {  /* LZ4_decompress_fast() */
                     /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
                      * it doesn't know input length, and relies on end-of-block properties */
-                    memcpy(op, ip, 8);
-                    if (length > 8) { memcpy(op+8, ip+8, 8); }
+                    LZ4_memcpy(op, ip, 8);
+                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
                 }
                 ip += length; op = cpy;
             }
@@ -1765,10 +1849,10 @@
             length = token & ML_MASK;
 
             if (length == ML_MASK) {
-              variable_length_error error = ok;
-              if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
-              length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
-              if (error != ok) { goto _output_error; }
+                variable_length_error error = ok;
+                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+                if (error != ok) { goto _output_error; }
                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
                 length += MINMATCH;
                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
@@ -1787,19 +1871,20 @@
                         assert(match <= op);
                         assert(op + 18 <= oend);
 
-                        memcpy(op, match, 8);
-                        memcpy(op+8, match+8, 8);
-                        memcpy(op+16, match+16, 2);
+                        LZ4_memcpy(op, match, 8);
+                        LZ4_memcpy(op+8, match+8, 8);
+                        LZ4_memcpy(op+16, match+16, 2);
                         op += length;
                         continue;
             }   }   }
 
-            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
             /* match starting within external dictionary */
             if ((dict==usingExtDict) && (match < lowPrefix)) {
                 if (unlikely(op+length > oend-LASTLITERALS)) {
                     if (partialDecoding) {
-                        length = MIN(length, (size_t)(oend-op));  /* reach end of buffer */
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
                     } else {
                         goto _output_error;  /* end-of-block condition violated */
                 }   }
@@ -1812,14 +1897,14 @@
                     /* match stretches into both external dictionary and current block */
                     size_t const copySize = (size_t)(lowPrefix - match);
                     size_t const restSize = length - copySize;
-                    memcpy(op, dictEnd - copySize, copySize);
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
                     op += copySize;
                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                         BYTE* const endOfMatch = op + restSize;
                         const BYTE* copyFrom = lowPrefix;
                         while (op < endOfMatch) { *op++ = *copyFrom++; }
                     } else {
-                        memcpy(op, lowPrefix, restSize);
+                        LZ4_memcpy(op, lowPrefix, restSize);
                         op += restSize;
                 }   }
                 continue;
@@ -1860,7 +1945,7 @@
                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
               && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
                 /* Copy the literals */
-                memcpy(op, ip, endOnInput ? 16 : 8);
+                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
                 op += length; ip += length;
 
                 /* The second stage: prepare for match copying, decode full info.
@@ -1875,9 +1960,9 @@
                   && (offset >= 8)
                   && (dict==withPrefix64k || match >= lowPrefix) ) {
                     /* Copy the match. */
-                    memcpy(op + 0, match + 0, 8);
-                    memcpy(op + 8, match + 8, 8);
-                    memcpy(op +16, match +16, 2);
+                    LZ4_memcpy(op + 0, match + 0, 8);
+                    LZ4_memcpy(op + 8, match + 8, 8);
+                    LZ4_memcpy(op +16, match +16, 2);
                     op += length + MINMATCH;
                     /* Both stages worked, load the next token. */
                     continue;
@@ -1891,7 +1976,7 @@
             /* decode literal length */
             if (length == RUN_MASK) {
                 variable_length_error error = ok;
-                length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
                 if (error == initial_error) { goto _output_error; }
                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -1907,29 +1992,34 @@
               || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
             {
                 /* We've either hit the input parsing restriction or the output parsing restriction.
-                 * If we've hit the input parsing condition then this must be the last sequence.
-                 * If we've hit the output parsing condition then we are either using partialDecoding
-                 * or we've hit the output parsing condition.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
                  */
                 if (partialDecoding) {
                     /* Since we are partial decoding we may be in this block because of the output parsing
                      * restriction, which is not valid since the output buffer is allowed to be undersized.
                      */
                     assert(endOnInput);
-                    /* If we're in this block because of the input parsing condition, then we must be on the
-                     * last sequence (or invalid), so we must check that we exactly consume the input.
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
                      */
-                    if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
-                    assert(ip+length <= iend);
-                    /* We are finishing in the middle of a literals segment.
-                     * Break after the copy.
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
                      */
                     if (cpy > oend) {
                         cpy = oend;
                         assert(op<=oend);
                         length = (size_t)(oend-op);
                     }
-                    assert(ip+length <= iend);
                 } else {
                     /* We must be on the last sequence because of the parsing limitations so check
                      * that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1938,16 +2028,22 @@
                      /* We must be on the last sequence (or invalid) because of the parsing limitations
                       * so check that we exactly consume the input and don't overrun the output buffer.
                       */
-                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+                        DEBUGLOG(6, "should have been last run of literals")
+                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+                        goto _output_error;
+                    }
                 }
-                memmove(op, ip, length);  /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
                 ip += length;
                 op += length;
-                /* Necessarily EOF when !partialDecoding. When partialDecoding
-                 * it is EOF if we've either filled the output buffer or hit
-                 * the input parsing restriction.
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
                  */
-                if (!partialDecoding || (cpy == oend) || (ip == iend)) {
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
                     break;
                 }
             } else {
@@ -1965,7 +2061,7 @@
     _copy_match:
             if (length == ML_MASK) {
               variable_length_error error = ok;
-              length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
               if (error != ok) goto _output_error;
                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
             }
@@ -1990,14 +2086,14 @@
                     /* match stretches into both external dictionary and current block */
                     size_t const copySize = (size_t)(lowPrefix - match);
                     size_t const restSize = length - copySize;
-                    memcpy(op, dictEnd - copySize, copySize);
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
                     op += copySize;
                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                         BYTE* const endOfMatch = op + restSize;
                         const BYTE* copyFrom = lowPrefix;
                         while (op < endOfMatch) *op++ = *copyFrom++;
                     } else {
-                        memcpy(op, lowPrefix, restSize);
+                        LZ4_memcpy(op, lowPrefix, restSize);
                         op += restSize;
                 }   }
                 continue;
@@ -2016,7 +2112,7 @@
                 if (matchEnd > op) {   /* overlap copy */
                     while (op < copyEnd) { *op++ = *match++; }
                 } else {
-                    memcpy(op, match, mlen);
+                    LZ4_memcpy(op, match, mlen);
                 }
                 op = copyEnd;
                 if (op == oend) { break; }
@@ -2030,10 +2126,10 @@
                 op[2] = match[2];
                 op[3] = match[3];
                 match += inc32table[offset];
-                memcpy(op+4, match, 4);
+                LZ4_memcpy(op+4, match, 4);
                 match -= dec64table[offset];
             } else {
-                memcpy(op, match, 8);
+                LZ4_memcpy(op, match, 8);
                 match += 8;
             }
             op += 8;
@@ -2048,7 +2144,7 @@
                 }
                 while (op < cpy) { *op++ = *match++; }
             } else {
-                memcpy(op, match, 8);
+                LZ4_memcpy(op, match, 8);
                 if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
             }
             op = cpy;   /* wildcopy correction */
@@ -2056,6 +2152,7 @@
 
         /* end of decoding */
         if (endOnInput) {
+            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
            return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
        } else {
            return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
@@ -2070,7 +2167,7 @@
 
 /*===== Instantiate the API decoding functions. =====*/
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
@@ -2078,7 +2175,7 @@
                                   (BYTE*)dest, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
 {
     dstCapacity = MIN(targetOutputSize, dstCapacity);
@@ -2087,7 +2184,7 @@
                                   noDict, (BYTE*)dst, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
 {
     return LZ4_decompress_generic(source, dest, 0, originalSize,
@@ -2097,7 +2194,7 @@
 
 /*===== Instantiate a few more decoding cases, used more than once. =====*/
 
-LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
@@ -2113,7 +2210,7 @@
     return LZ4_decompress_fast(source, dest, originalSize);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
                                                size_t prefixSize)
 {
@@ -2122,7 +2219,7 @@
                                   (BYTE*)dest-prefixSize, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
                                      int compressedSize, int maxOutputSize,
                                      const void* dictStart, size_t dictSize)
@@ -2132,7 +2229,7 @@
                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
                                        const void* dictStart, size_t dictSize)
 {
@@ -2221,7 +2318,7 @@
     If it's not possible, save the relevant part of decoded data into a safe buffer,
     and indicate where it stands using LZ4_setStreamDecode()
 */
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2261,7 +2358,7 @@
     return result;
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
 {
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2374,7 +2471,7 @@
 
 /* Obsolete Streaming functions */
 
-int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
 
 int LZ4_resetStreamState(void* state, char* inputBuffer)
 {
diff --git a/lib/lz4.h b/lib/lz4.h
index 32108e2..7ab1e48 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -100,7 +100,7 @@
 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
 #define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
@@ -186,7 +186,8 @@
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
     An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
 */
 LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
@@ -212,7 +213,18 @@
  *               New value is necessarily <= input value.
  * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
  *           or 0 if compression fails.
-*/
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
 LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
 
 
@@ -220,25 +232,35 @@
  *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
  *  into destination buffer 'dst' of size 'dstCapacity'.
  *  Up to 'targetOutputSize' bytes will be decoded.
- *  The function stops decoding on reaching this objective,
- *  which can boost performance when only the beginning of a block is required.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
  *
- * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
  *           If source stream is detected malformed, function returns a negative result.
  *
- *  Note : @return can be < targetOutputSize, if compressed block contains less data.
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
  *
- *  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
- *           and expects targetOutputSize <= dstCapacity.
- *           It effectively stops decoding on reaching targetOutputSize,
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
  *           so dstCapacity is kind of redundant.
- *           This is because in a previous version of this function,
- *           decoding operation would not "break" a sequence in the middle.
- *           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
  *           it could write more bytes, though only up to dstCapacity.
  *           Some "margin" used to be required for this operation to work properly.
- *           This is no longer necessary.
- *           The function nonetheless keeps its signature, in an effort to not break API.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
  */
 LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 
@@ -547,74 +569,64 @@
 #define LZ4_H_98237428734687
 
 /*-************************************************************
- *  PRIVATE DEFINITIONS
+ *  Private Definitions
  **************************************************************
  * Do not use these definitions directly.
  * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- * Accessing members will expose code to API and/or ABI break in future versions of the library.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
  **************************************************************/
 #define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
 #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
 #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
 
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#include <stdint.h>
-
-typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
-struct LZ4_stream_t_internal {
-    uint32_t hashTable[LZ4_HASH_SIZE_U32];
-    uint32_t currentOffset;
-    uint16_t dirty;
-    uint16_t tableType;
-    const uint8_t* dictionary;
-    const LZ4_stream_t_internal* dictCtx;
-    uint32_t dictSize;
-};
-
-typedef struct {
-    const uint8_t* externalDict;
-    size_t extDictSize;
-    const uint8_t* prefixEnd;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-
+# include <stdint.h>
+  typedef  int8_t  LZ4_i8;
+  typedef uint8_t  LZ4_byte;
+  typedef uint16_t LZ4_u16;
+  typedef uint32_t LZ4_u32;
 #else
-
-typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
-struct LZ4_stream_t_internal {
-    unsigned int hashTable[LZ4_HASH_SIZE_U32];
-    unsigned int currentOffset;
-    unsigned short dirty;
-    unsigned short tableType;
-    const unsigned char* dictionary;
-    const LZ4_stream_t_internal* dictCtx;
-    unsigned int dictSize;
-};
-
-typedef struct {
-    const unsigned char* externalDict;
-    const unsigned char* prefixEnd;
-    size_t extDictSize;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-
+  typedef   signed char  LZ4_i8;
+  typedef unsigned char  LZ4_byte;
+  typedef unsigned short LZ4_u16;
+  typedef unsigned int   LZ4_u32;
 #endif
 
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    const LZ4_byte* dictionary;
+    const LZ4_stream_t_internal* dictCtx;
+    LZ4_u32 dictSize;
+};
+
+typedef struct {
+    const LZ4_byte* externalDict;
+    size_t extDictSize;
+    const LZ4_byte* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
 /*! LZ4_stream_t :
- *  information structure to track an LZ4 stream.
+ *  Do not use below internal definitions directly !
+ *  Declare or allocate an LZ4_stream_t instead.
  *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
  *  The structure definition can be convenient for static allocation
  *  (on stack, or as part of larger structure).
  *  Init this structure with LZ4_initStream() before first use.
  *  note : only use this definition in association with static linking !
- *    this definition is not API/ABI safe, and may change in a future version.
+ *  this definition is not API/ABI safe, and may change in future versions.
  */
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ )
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+#define LZ4_STREAMSIZE       16416  /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
 union LZ4_stream_u {
-    unsigned long long table[LZ4_STREAMSIZE_U64];
+    void* table[LZ4_STREAMSIZE_VOIDP];
     LZ4_stream_t_internal internal_donotuse;
-} ;  /* previously typedef'd to LZ4_stream_t */
+}; /* previously typedef'd to LZ4_stream_t */
+
 
 /*! LZ4_initStream() : v1.9.0+
  *  An LZ4_stream_t structure must be initialized at least once.
@@ -667,22 +679,21 @@
 #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
 #  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
 #else
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  elif defined(_MSC_VER)
 #    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   /* disabled */
 #  endif
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
 
-/* Obsolete compression functions */
+/*! Obsolete compression functions (since v1.7.3) */
 LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
 LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
@@ -690,11 +701,12 @@
 LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
 
-/* Obsolete decompression functions */
+/*! Obsolete decompression functions (since v1.8.0) */
 LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
 LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
 
-/* Obsolete streaming functions; degraded functionality; do not use!
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
  *
  * In order to perform streaming compression, these functions depended on data
  * that is no longer tracked in the state. They have been preserved as well as
@@ -708,23 +720,22 @@
 LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
 LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
 
-/* Obsolete streaming decoding functions */
+/*! Obsolete streaming decoding functions (since v1.7.0) */
 LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
 LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
 
-/*! LZ4_decompress_fast() : **unsafe!**
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
  *  These functions used to be faster than LZ4_decompress_safe(),
- *  but it has changed, and they are now slower than LZ4_decompress_safe().
+ *  but this is no longer the case. They are now slower.
  *  This is because LZ4_decompress_fast() doesn't know the input size,
- *  and therefore must progress more cautiously in the input buffer to not read beyond the end of block.
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
  *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
  *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
  *
  *  The last remaining LZ4_decompress_fast() specificity is that
  *  it can decompress a block without knowing its compressed size.
- *  Such functionality could be achieved in a more secure manner,
- *  by also providing the maximum size of input buffer,
- *  but it would require new prototypes, and adaptation of the implementation to this new use case.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
  *
  *  Parameters:
  *  originalSize : is the uncompressed size to regenerate.
@@ -739,7 +750,6 @@
  *         But they may happen if input data is invalid (error or intentional tampering).
  *         As a consequence, use these functions in trusted environments with trusted data **only**.
  */
-
 LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
 LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
 LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index c9f630d..ec02c92 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -71,8 +71,8 @@
  * towards another library or solution of their choice
  * by modifying below section.
  */
-#include <stdlib.h>   /* malloc, calloc, free */
 #ifndef LZ4_SRC_INCLUDED   /* avoid redefinition when sources are coalesced */
+#  include <stdlib.h>   /* malloc, calloc, free */
 #  define ALLOC(s)          malloc(s)
 #  define ALLOC_AND_ZERO(s) calloc(1,(s))
 #  define FREEMEM(p)        free(p)
@@ -533,7 +533,7 @@
  *  If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
  *  Object can release its memory using LZ4F_freeCompressionContext();
  */
-LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version)
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** LZ4F_compressionContextPtr, unsigned version)
 {
     LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t));
     if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed);
@@ -541,20 +541,18 @@
     cctxPtr->version = version;
     cctxPtr->cStage = 0;   /* Next stage : init stream */
 
-    *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr;
+    *LZ4F_compressionContextPtr = cctxPtr;
 
     return LZ4F_OK_NoError;
 }
 
 
-LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext)
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctxPtr)
 {
-    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext;
-
     if (cctxPtr != NULL) {  /* support free on NULL */
-       FREEMEM(cctxPtr->lz4CtxPtr);  /* works because LZ4_streamHC_t and LZ4_stream_t are simple POD types */
+       FREEMEM(cctxPtr->lz4CtxPtr);  /* note: LZ4_streamHC_t and LZ4_stream_t are simple POD types */
        FREEMEM(cctxPtr->tmpBuff);
-       FREEMEM(LZ4F_compressionContext);
+       FREEMEM(cctxPtr);
     }
 
     return LZ4F_OK_NoError;
@@ -725,6 +723,9 @@
  */
 size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
 {
+    if (preferencesPtr && preferencesPtr->autoFlush) {
+        return LZ4F_compressBound_internal(srcSize, preferencesPtr, 0);
+    }
     return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1);
 }
 
@@ -747,6 +748,7 @@
                                       (int)(srcSize), (int)(srcSize-1),
                                       level, cdict);
     if (cSize == 0) {  /* compression failed */
+        DEBUGLOG(5, "LZ4F_makeBlock: compression failed, creating a raw block (size %u)", (U32)srcSize);
         cSize = (U32)srcSize;
         LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
         memcpy(cSizePtr+BHSize, src, srcSize);
@@ -989,6 +991,7 @@
     BYTE* dstPtr = dstStart;
 
     size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr);
+    DEBUGLOG(5,"LZ4F_compressEnd: dstCapacity=%u", (unsigned)dstCapacity);
     if (LZ4F_isError(flushSize)) return flushSize;
     dstPtr += flushSize;
 
@@ -1002,6 +1005,7 @@
     if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
         U32 const xxh = XXH32_digest(&(cctxPtr->xxh));
         if (dstCapacity < 8) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+        DEBUGLOG(5,"Writing 32-bit content checksum");
         LZ4F_writeLE32(dstPtr, xxh);
         dstPtr+=4;   /* content Checksum */
     }
@@ -1112,6 +1116,7 @@
     size_t frameHeaderSize;
     const BYTE* srcPtr = (const BYTE*)src;
 
+    DEBUGLOG(5, "LZ4F_decodeHeader");
     /* need to decode header to get frameInfo */
     if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete);   /* minimal frame header size */
     MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
@@ -1132,8 +1137,10 @@
 
     /* control magic number */
 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER)
+    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) {
+        DEBUGLOG(4, "frame header error : unknown magic number");
         return err0r(LZ4F_ERROR_frameType_unknown);
+    }
 #endif
     dctx->frameInfo.frameType = LZ4F_frame;
 
@@ -1282,15 +1289,20 @@
 
 
 /* LZ4F_updateDict() :
- * only used for LZ4F_blockLinked mode */
+ * only used for LZ4F_blockLinked mode
+ * Condition : dstPtr != NULL
+ */
 static void LZ4F_updateDict(LZ4F_dctx* dctx,
                       const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
                       unsigned withinTmp)
 {
-    if (dctx->dictSize==0)
-        dctx->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
+    assert(dstPtr != NULL);
+    if (dctx->dictSize==0) {
+        dctx->dict = (const BYTE*)dstPtr;   /* priority to prefix mode */
+    }
+    assert(dctx->dict != NULL);
 
-    if (dctx->dict + dctx->dictSize == dstPtr) {  /* dictionary continuity, directly within dstBuffer */
+    if (dctx->dict + dctx->dictSize == dstPtr) {  /* prefix mode, everything within dstBuffer */
         dctx->dictSize += dstSize;
         return;
     }
@@ -1304,9 +1316,10 @@
 
     assert(dstSize < 64 KB);   /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
 
-    /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */
+    /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOutBuffer */
+    assert(dctx->tmpOutBuffer != NULL);
 
-    if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {   /* continue history within tmpOutBuffer */
+    if (withinTmp && (dctx->dict == dctx->tmpOutBuffer)) {   /* continue history within tmpOutBuffer */
         /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
         assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
         dctx->dictSize += dstSize;
@@ -1378,17 +1391,21 @@
     const BYTE* const srcEnd = srcStart + *srcSizePtr;
     const BYTE* srcPtr = srcStart;
     BYTE* const dstStart = (BYTE*)dstBuffer;
-    BYTE* const dstEnd = dstStart + *dstSizePtr;
+    BYTE* const dstEnd = dstStart ? dstStart + *dstSizePtr : NULL;
     BYTE* dstPtr = dstStart;
     const BYTE* selectedIn = NULL;
     unsigned doAnotherStage = 1;
     size_t nextSrcSizeHint = 1;
 
 
+    DEBUGLOG(5, "LZ4F_decompress : %p,%u => %p,%u",
+            srcBuffer, (unsigned)*srcSizePtr, dstBuffer, (unsigned)*dstSizePtr);
+    if (dstBuffer == NULL) assert(*dstSizePtr == 0);
     MEM_INIT(&optionsNull, 0, sizeof(optionsNull));
     if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
     *srcSizePtr = 0;
     *dstSizePtr = 0;
+    assert(dctx != NULL);
 
     /* behaves as a state machine */
 
@@ -1398,6 +1415,7 @@
         {
 
         case dstage_getFrameHeader:
+            DEBUGLOG(6, "dstage_getFrameHeader");
             if ((size_t)(srcEnd-srcPtr) >= maxFHSize) {  /* enough to decode - shortcut */
                 size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr));  /* will update dStage appropriately */
                 if (LZ4F_isError(hSize)) return hSize;
@@ -1411,6 +1429,7 @@
             /* fall-through */
 
         case dstage_storeFrameHeader:
+            DEBUGLOG(6, "dstage_storeFrameHeader");
             {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr));
                 memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
                 dctx->tmpInSize += sizeToCopy;
@@ -1427,6 +1446,7 @@
             break;
 
         case dstage_init:
+            DEBUGLOG(6, "dstage_init");
             if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0);
             /* internal buffers allocation */
             {   size_t const bufferNeeded = dctx->maxBlockSize
@@ -1480,17 +1500,21 @@
             }   /* if (dctx->dStage == dstage_storeBlockHeader) */
 
         /* decode block header */
-            {   size_t const nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
+            {   U32 const blockHeader = LZ4F_readLE32(selectedIn);
+                size_t const nextCBlockSize = blockHeader & 0x7FFFFFFFU;
                 size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize;
-                if (nextCBlockSize==0) {  /* frameEnd signal, no more block */
+                if (blockHeader==0) {  /* frameEnd signal, no more block */
+                    DEBUGLOG(5, "end of frame");
                     dctx->dStage = dstage_getSuffix;
                     break;
                 }
-                if (nextCBlockSize > dctx->maxBlockSize)
+                if (nextCBlockSize > dctx->maxBlockSize) {
                     return err0r(LZ4F_ERROR_maxBlockSize_invalid);
-                if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
+                }
+                if (blockHeader & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
                     /* next block is uncompressed */
                     dctx->tmpInTarget = nextCBlockSize;
+                    DEBUGLOG(5, "next block is uncompressed (size %u)", (U32)nextCBlockSize);
                     if (dctx->frameInfo.blockChecksumFlag) {
                         (void)XXH32_reset(&dctx->blockChecksum, 0);
                     }
@@ -1508,20 +1532,26 @@
             }
 
         case dstage_copyDirect:   /* uncompressed block */
-            {   size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
-                size_t const sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
-                memcpy(dstPtr, srcPtr, sizeToCopy);
-                if (dctx->frameInfo.blockChecksumFlag) {
-                    (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
-                }
-                if (dctx->frameInfo.contentChecksumFlag)
-                    (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
-                if (dctx->frameInfo.contentSize)
-                    dctx->frameRemainingSize -= sizeToCopy;
+            DEBUGLOG(6, "dstage_copyDirect");
+            {   size_t sizeToCopy;
+                if (dstPtr == NULL) {
+                    sizeToCopy = 0;
+                } else {
+                    size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
+                    sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
+                    memcpy(dstPtr, srcPtr, sizeToCopy);
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+                    }
+                    if (dctx->frameInfo.contentChecksumFlag)
+                        (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+                    if (dctx->frameInfo.contentSize)
+                        dctx->frameRemainingSize -= sizeToCopy;
 
-                /* history management (linked blocks only)*/
-                if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
-                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
+                    /* history management (linked blocks only)*/
+                    if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+                        LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
+                }   }
 
                 srcPtr += sizeToCopy;
                 dstPtr += sizeToCopy;
@@ -1534,15 +1564,16 @@
                     break;
                 }
                 dctx->tmpInTarget -= sizeToCopy;  /* need to copy more */
-                nextSrcSizeHint = dctx->tmpInTarget +
-                                +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
-                                + BHSize /* next header size */;
-                doAnotherStage = 0;
-                break;
             }
+            nextSrcSizeHint = dctx->tmpInTarget +
+                            +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+                            + BHSize /* next header size */;
+            doAnotherStage = 0;
+            break;
 
         /* check block checksum for recently transferred uncompressed block */
         case dstage_getBlockChecksum:
+            DEBUGLOG(6, "dstage_getBlockChecksum");
             {   const void* crcSrc;
                 if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) {
                     crcSrc = srcPtr;
@@ -1562,8 +1593,12 @@
                 {   U32 const readCRC = LZ4F_readLE32(crcSrc);
                     U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-                    if (readCRC != calcCRC)
+                    DEBUGLOG(6, "compare block checksum");
+                    if (readCRC != calcCRC) {
+                        DEBUGLOG(4, "incorrect block checksum: %08X != %08X",
+                                readCRC, calcCRC);
                         return err0r(LZ4F_ERROR_blockChecksum_invalid);
+                    }
 #else
                     (void)readCRC;
                     (void)calcCRC;
@@ -1573,6 +1608,7 @@
             break;
 
         case dstage_getCBlock:
+            DEBUGLOG(6, "dstage_getCBlock");
             if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) {
                 dctx->tmpInSize = 0;
                 dctx->dStage = dstage_storeCBlock;
@@ -1582,7 +1618,7 @@
             selectedIn = srcPtr;
             srcPtr += dctx->tmpInTarget;
 
-            if (0)  /* jump over next block */
+            if (0)  /* always jump over next block */
         case dstage_storeCBlock:
             {   size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize;
                 size_t const inputLeft = (size_t)(srcEnd-srcPtr);
@@ -1619,6 +1655,7 @@
                 const char* dict = (const char*)dctx->dict;
                 size_t dictSize = dctx->dictSize;
                 int decodedSize;
+                assert(dstPtr != NULL);
                 if (dict && dictSize > 1 GB) {
                     /* the dictSize param is an int, avoid truncation / sign issues */
                     dict += dictSize - 64 KB;
@@ -1636,8 +1673,9 @@
                     dctx->frameRemainingSize -= (size_t)decodedSize;
 
                 /* dictionary management */
-                if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
+                if (dctx->frameInfo.blockMode==LZ4F_blockLinked) {
                     LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0);
+                }
 
                 dstPtr += decodedSize;
                 dctx->dStage = dstage_getBlockHeader;
@@ -1684,7 +1722,9 @@
             /* fall-through */
 
         case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
-            {   size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
+            DEBUGLOG(6, "dstage_flushOut");
+            if (dstPtr != NULL) {
+                size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
                 memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
 
                 /* dictionary management */
@@ -1693,16 +1733,15 @@
 
                 dctx->tmpOutStart += sizeToCopy;
                 dstPtr += sizeToCopy;
-
-                if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
-                    dctx->dStage = dstage_getBlockHeader;  /* get next block */
-                    break;
-                }
-                /* could not flush everything : stop there, just request a block header */
-                doAnotherStage = 0;
-                nextSrcSizeHint = BHSize;
+            }
+            if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
+                dctx->dStage = dstage_getBlockHeader;  /* get next block */
                 break;
             }
+            /* could not flush everything : stop there, just request a block header */
+            doAnotherStage = 0;
+            nextSrcSizeHint = BHSize;
+            break;
 
         case dstage_getSuffix:
             if (dctx->frameRemainingSize)
@@ -1806,6 +1845,7 @@
     LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2);
     if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked)  /* next block will use up to 64KB from previous ones */
       && (dctx->dict != dctx->tmpOutBuffer)             /* dictionary is not already within tmp */
+      && (dctx->dict != NULL)                           /* dictionary exists */
       && (!decompressOptionsPtr->stableDst)             /* cannot rely on dst data to remain there for next call */
       && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) )  /* valid stages : [init ... getSuffix[ */
     {
@@ -1815,9 +1855,9 @@
             const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
             if (dctx->tmpOutSize > 64 KB) copySize = 0;
             if (copySize > preserveSize) copySize = preserveSize;
+            assert(dctx->tmpOutBuffer != NULL);
 
-            if (copySize > 0)
-                memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+            memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
 
             dctx->dict = dctx->tmpOutBuffer;
             dctx->dictSize = preserveSize + dctx->tmpOutStart;
@@ -1825,8 +1865,7 @@
             const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize;
             size_t const newDictSize = MIN(dctx->dictSize, 64 KB);
 
-            if (newDictSize > 0)
-                memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+            memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
 
             dctx->dict = dctx->tmpOutBuffer;
             dctx->dictSize = newDictSize;
diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index 391e484..4573317 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@@ -66,17 +66,22 @@
  *****************************************************************/
 /*  LZ4_DLL_EXPORT :
  *  Enable exporting of functions when building a Windows DLL
- *  LZ4FLIB_API :
+ *  LZ4FLIB_VISIBILITY :
  *  Control library symbols visibility.
  */
+#ifndef LZ4FLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4FLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4FLIB_VISIBILITY
+#  endif
+#endif
 #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
-#  define LZ4FLIB_API __declspec(dllexport)
+#  define LZ4FLIB_API __declspec(dllexport) LZ4FLIB_VISIBILITY
 #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
-#  define LZ4FLIB_API __declspec(dllimport)
-#elif defined(__GNUC__) && (__GNUC__ >= 4)
-#  define LZ4FLIB_API __attribute__ ((__visibility__ ("default")))
+#  define LZ4FLIB_API __declspec(dllimport) LZ4FLIB_VISIBILITY
 #else
-#  define LZ4FLIB_API
+#  define LZ4FLIB_API LZ4FLIB_VISIBILITY
 #endif
 
 #ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS
@@ -103,7 +108,7 @@
 
 /*-************************************
  *  Frame compression types
- **************************************/
+ ************************************* */
 /* #define LZ4F_ENABLE_OBSOLETE_ENUMS   // uncomment to enable obsolete enums */
 #ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
 #  define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
@@ -113,7 +118,8 @@
 
 /* The larger the block size, the (slightly) better the compression ratio,
  * though there are diminishing returns.
- * Larger blocks also increase memory usage on both compression and decompression sides. */
+ * Larger blocks also increase memory usage on both compression and decompression sides.
+ */
 typedef enum {
     LZ4F_default=0,
     LZ4F_max64KB=4,
@@ -284,7 +290,7 @@
  * @return is always the same for a srcSize and prefsPtr.
  *  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
  *  tech details :
- * @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ * @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
  *  It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
  * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
  */
@@ -376,7 +382,7 @@
  *  note : Frame header size is variable, but is guaranteed to be
  *         >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
  */
-size_t LZ4F_headerSize(const void* src, size_t srcSize);
+LZ4FLIB_API size_t LZ4F_headerSize(const void* src, size_t srcSize);
 
 /*! LZ4F_getFrameInfo() :
  *  This function extracts frame parameters (max blockSize, dictID, etc.).
@@ -426,8 +432,10 @@
                                      const void* srcBuffer, size_t* srcSizePtr);
 
 /*! LZ4F_decompress() :
- *  Call this function repetitively to regenerate compressed data from `srcBuffer`.
- *  The function will read up to *srcSizePtr bytes from srcBuffer,
+ *  Call this function repetitively to regenerate data compressed in `srcBuffer`.
+ *
+ *  The function requires a valid dctx state.
+ *  It will read up to *srcSizePtr bytes from srcBuffer,
  *  and decompress data into dstBuffer, of capacity *dstSizePtr.
  *
  *  The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
@@ -493,9 +501,9 @@
  * Use at your own risk.
  */
 #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
-#define LZ4FLIB_STATIC_API LZ4FLIB_API
+# define LZ4FLIB_STATIC_API LZ4FLIB_API
 #else
-#define LZ4FLIB_STATIC_API
+# define LZ4FLIB_STATIC_API
 #endif
 
 
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 5922ed7..77c9f43 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -53,7 +53,7 @@
 #include "lz4hc.h"
 
 
-/*===   Common LZ4 definitions   ===*/
+/*===   Common definitions   ===*/
 #if defined(__GNUC__)
 #  pragma GCC diagnostic ignored "-Wunused-function"
 #endif
@@ -61,15 +61,16 @@
 #  pragma clang diagnostic ignored "-Wunused-function"
 #endif
 
-/*===   Enums   ===*/
-typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
-
-
 #define LZ4_COMMONDEFS_ONLY
 #ifndef LZ4_SRC_INCLUDED
 #include "lz4.c"   /* LZ4_count, constants, mem */
 #endif
 
+
+/*===   Enums   ===*/
+typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
+
+
 /*===   Constants   ===*/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
 #define LZ4_OPT_NUM   (1<<12)
@@ -92,7 +93,7 @@
 **************************************/
 static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
 {
-    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
 }
 
@@ -161,8 +162,7 @@
 static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
 {
     size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
-    if (bitsToRotate == 0)
-        return pattern;
+    if (bitsToRotate == 0) return pattern;
     return LZ4HC_rotl32(pattern, (int)bitsToRotate);
 }
 
@@ -172,7 +172,8 @@
 LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
 {
     const BYTE* const iStart = ip;
-    reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
+    reg_t const pattern = (sizeof(pattern)==8) ?
+        (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
 
     while (likely(ip < iEnd-(sizeof(pattern)-1))) {
         reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
@@ -270,7 +271,7 @@
     DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
                 matchIndex, lowestMatchIndex);
 
-    while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
+    while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
         int matchLength=0;
         nbAttempts--;
         assert(matchIndex < ipIndex);
@@ -389,8 +390,8 @@
                                     if (lookBackLength==0) {  /* no back possible */
                                         size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
                                         if ((size_t)longest < maxML) {
-                                            assert(base + matchIndex < ip);
-                                            if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
+                                            assert(base + matchIndex != ip);
+                                            if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
                                             assert(maxML < 2 GB);
                                             longest = (int)maxML;
                                             *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
@@ -410,7 +411,7 @@
     }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
 
     if ( dict == usingDictCtxHc
-      && nbAttempts
+      && nbAttempts > 0
       && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
         size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
         U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
@@ -460,74 +461,90 @@
  * @return : 0 if ok,
  *           1 if buffer issue detected */
 LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
-    const BYTE** ip,
-    BYTE** op,
-    const BYTE** anchor,
+    const BYTE** _ip,
+    BYTE** _op,
+    const BYTE** _anchor,
     int matchLength,
     const BYTE* const match,
     limitedOutput_directive limit,
     BYTE* oend)
 {
+#define ip      (*_ip)
+#define op      (*_op)
+#define anchor  (*_anchor)
+
     size_t length;
-    BYTE* const token = (*op)++;
+    BYTE* const token = op++;
 
 #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
     static const BYTE* start = NULL;
     static U32 totalCost = 0;
-    U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
-    U32 const ll = (U32)(*ip - *anchor);
+    U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
+    U32 const ll = (U32)(ip - anchor);
     U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
     U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
     U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
-    if (start==NULL) start = *anchor;  /* only works for single segment */
+    if (start==NULL) start = anchor;  /* only works for single segment */
     /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
-    DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
+    DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
                 pos,
-                (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
+                (U32)(ip - anchor), matchLength, (U32)(ip-match),
                 cost, totalCost);
     totalCost += cost;
 #endif
 
     /* Encode Literal length */
-    length = (size_t)(*ip - *anchor);
-    if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    length = (size_t)(ip - anchor);
+    LZ4_STATIC_ASSERT(notLimited == 0);
+    /* Check output limit */
+    if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
+        DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
+                (int)length, (int)(oend - op));
+        return 1;
+    }
     if (length >= RUN_MASK) {
         size_t len = length - RUN_MASK;
         *token = (RUN_MASK << ML_BITS);
-        for(; len >= 255 ; len -= 255) *(*op)++ = 255;
-        *(*op)++ = (BYTE)len;
+        for(; len >= 255 ; len -= 255) *op++ = 255;
+        *op++ = (BYTE)len;
     } else {
         *token = (BYTE)(length << ML_BITS);
     }
 
     /* Copy Literals */
-    LZ4_wildCopy8(*op, *anchor, (*op) + length);
-    *op += length;
+    LZ4_wildCopy8(op, anchor, op + length);
+    op += length;
 
     /* Encode Offset */
-    assert( (*ip - match) <= LZ4_DISTANCE_MAX );   /* note : consider providing offset as a value, rather than as a pointer difference */
-    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+    assert( (ip - match) <= LZ4_DISTANCE_MAX );   /* note : consider providing offset as a value, rather than as a pointer difference */
+    LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
     /* Encode MatchLength */
     assert(matchLength >= MINMATCH);
     length = (size_t)matchLength - MINMATCH;
-    if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
+        DEBUGLOG(6, "Not enough room to write match length");
+        return 1;   /* Check output limit */
+    }
     if (length >= ML_MASK) {
         *token += ML_MASK;
         length -= ML_MASK;
-        for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
-        if (length >= 255) { length -= 255; *(*op)++ = 255; }
-        *(*op)++ = (BYTE)length;
+        for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
+        if (length >= 255) { length -= 255; *op++ = 255; }
+        *op++ = (BYTE)length;
     } else {
         *token += (BYTE)(length);
     }
 
     /* Prepare next loop */
-    *ip += matchLength;
-    *anchor = *ip;
+    ip += matchLength;
+    anchor = ip;
 
     return 0;
 }
+#undef ip
+#undef op
+#undef anchor
 
 LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
     LZ4HC_CCtx_internal* const ctx,
@@ -535,7 +552,7 @@
     char* const dest,
     int* srcSizePtr,
     int const maxOutputSize,
-    unsigned maxNbAttempts,
+    int maxNbAttempts,
     const limitedOutput_directive limit,
     const dictCtx_directive dict
     )
@@ -565,7 +582,7 @@
     /* init */
     *srcSizePtr = 0;
     if (limit == fillOutput) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
-    if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+    if (inputSize < LZ4_minLength) goto _last_literals;             /* Input too small, no compression (all literals) */
 
     /* Main Loop */
     while (ip <= mflimit) {
@@ -637,7 +654,11 @@
             if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
             ip = start2;
             optr = op;
-            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
+                ml  = ml2;
+                ref = ref2;
+                goto _dest_overflow;
+            }
             continue;
         }
 
@@ -709,17 +730,18 @@
 _last_literals:
     /* Encode Last Literals */
     {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
-        size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
-        size_t const totalSize = 1 + litLength + lastRunSize;
+        size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+        size_t const totalSize = 1 + llAdd + lastRunSize;
         if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
         if (limit && (op + totalSize > oend)) {
-            if (limit == limitedOutput) return 0;  /* Check output limit */
+            if (limit == limitedOutput) return 0;
             /* adapt lastRunSize to fill 'dest' */
-            lastRunSize  = (size_t)(oend - op) - 1;
-            litLength = (lastRunSize + 255 - RUN_MASK) / 255;
-            lastRunSize -= litLength;
+            lastRunSize  = (size_t)(oend - op) - 1 /*token*/;
+            llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+            lastRunSize -= llAdd;
         }
-        ip = anchor + lastRunSize;
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+        ip = anchor + lastRunSize;  /* can be != iend if limit==fillOutput */
 
         if (lastRunSize >= RUN_MASK) {
             size_t accumulator = lastRunSize - RUN_MASK;
@@ -739,9 +761,25 @@
 
 _dest_overflow:
     if (limit == fillOutput) {
+        /* Assumption : ip, anchor, ml and ref must be set correctly */
+        size_t const ll = (size_t)(ip - anchor);
+        size_t const ll_addbytes = (ll + 240) / 255;
+        size_t const ll_totalCost = 1 + ll_addbytes + ll;
+        BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+        DEBUGLOG(6, "Last sequence overflowing");
         op = optr;  /* restore correct out pointer */
+        if (op + ll_totalCost <= maxLitPos) {
+            /* ll validated; now adjust match length */
+            size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+            size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+            assert(maxMlSize < INT_MAX); assert(ml >= 0);
+            if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
+            if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
+                LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
+        }   }
         goto _last_literals;
     }
+    /* compression failed */
     return 0;
 }
 
@@ -752,7 +790,7 @@
     int const nbSearches, size_t sufficient_len,
     const limitedOutput_directive limit, int const fullUpdate,
     const dictCtx_directive dict,
-    HCfavor_e favorDecSpeed);
+    const HCfavor_e favorDecSpeed);
 
 
 LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
@@ -769,7 +807,7 @@
     typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
     typedef struct {
         lz4hc_strat_e strat;
-        U32 nbSearches;
+        int nbSearches;
         U32 targetLength;
     } cParams_t;
     static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
@@ -788,7 +826,8 @@
         { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
     };
 
-    DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr);
+    DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+                ctx, src, *srcSizePtr, limit);
 
     if (limit == fillOutput && dstCapacity < 1) return 0;   /* Impossible to store anything */
     if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;    /* Unsupported input size (too large or negative) */
@@ -808,7 +847,7 @@
             assert(cParam.strat == lz4opt);
             result = LZ4HC_compress_optimal(ctx,
                                 src, dst, srcSizePtr, dstCapacity,
-                                (int)cParam.nbSearches, cParam.targetLength, limit,
+                                cParam.nbSearches, cParam.targetLength, limit,
                                 cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
                                 dict, favor);
         }
@@ -881,27 +920,22 @@
 
 int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
 
-#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
-                   * it reports an aligment of 8-bytes,
-                   * while actually aligning LZ4_streamHC_t on 4 bytes. */
 static size_t LZ4_streamHC_t_alignment(void)
 {
-    struct { char c; LZ4_streamHC_t t; } t_a;
-    return sizeof(t_a) - sizeof(t_a.t);
-}
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_streamHC_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_streamHC_t);
+#else
+    return 1;  /* effectively disabled */
 #endif
+}
 
 /* state is presumed correctly initialized,
  * in which case its size and alignment have already been validate */
 int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
     LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
-#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
-                   * it reports an aligment of 8-bytes,
-                   * while actually aligning LZ4_streamHC_t on 4 bytes. */
-    assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0);  /* check alignment */
-#endif
-    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
     LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
     LZ4HC_init_internal (ctx, (const BYTE*)src);
     if (dstCapacity < LZ4_compressBound(srcSize))
@@ -950,10 +984,11 @@
 /* allocation */
 LZ4_streamHC_t* LZ4_createStreamHC(void)
 {
-    LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
-    if (LZ4_streamHCPtr==NULL) return NULL;
-    LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));  /* full initialization, malloc'ed buffer can be full of garbage */
-    return LZ4_streamHCPtr;
+    LZ4_streamHC_t* const state =
+        (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
+    if (state == NULL) return NULL;
+    LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
+    return state;
 }
 
 int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
@@ -968,22 +1003,16 @@
 LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
 {
     LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
-    if (buffer == NULL) return NULL;
-    if (size < sizeof(LZ4_streamHC_t)) return NULL;
-#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
-                   * it reports an aligment of 8-bytes,
-                   * while actually aligning LZ4_streamHC_t on 4 bytes. */
-    if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL;  /* alignment check */
-#endif
     /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
     LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
-    DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size);
-    /* end-base will trigger a clearTable on starting compression */
-    LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1;
-    LZ4_streamHCPtr->internal_donotuse.base = NULL;
-    LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
-    LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
-    LZ4_streamHCPtr->internal_donotuse.dirty = 0;
+    DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
+    /* check conditions */
+    if (buffer == NULL) return NULL;
+    if (size < sizeof(LZ4_streamHC_t)) return NULL;
+    if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
+    /* init */
+    { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
+      MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
     LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
     return LZ4_streamHCPtr;
 }
@@ -1028,7 +1057,7 @@
               const char* dictionary, int dictSize)
 {
     LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
-    DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize);
+    DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
     assert(LZ4_streamHCPtr != NULL);
     if (dictSize > 64 KB) {
         dictionary += (size_t)dictSize - 64 KB;
@@ -1069,14 +1098,15 @@
     ctxPtr->dictCtx = NULL;
 }
 
-static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
-                                            const char* src, char* dst,
-                                            int* srcSizePtr, int dstCapacity,
-                                            limitedOutput_directive limit)
+static int
+LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+                                 const char* src, char* dst,
+                                 int* srcSizePtr, int dstCapacity,
+                                 limitedOutput_directive limit)
 {
     LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
-    DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)",
-                LZ4_streamHCPtr, src, *srcSizePtr);
+    DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+                LZ4_streamHCPtr, src, *srcSizePtr, limit);
     assert(ctxPtr != NULL);
     /* auto-init if forgotten */
     if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
@@ -1100,8 +1130,7 @@
             if (sourceEnd > dictEnd) sourceEnd = dictEnd;
             ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
             if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
-        }
-    }
+    }   }
 
     return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
 }
@@ -1121,23 +1150,30 @@
 
 
 
-/* dictionary saving */
-
+/* LZ4_saveDictHC :
+ * save history content
+ * into a user-provided buffer
+ * which is then used to continue compression
+ */
 int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
 {
     LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
     int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
-    DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
+    DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
+    assert(prefixSize >= 0);
     if (dictSize > 64 KB) dictSize = 64 KB;
     if (dictSize < 4) dictSize = 0;
     if (dictSize > prefixSize) dictSize = prefixSize;
-    memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0)
+        memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
     {   U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
         streamPtr->end = (const BYTE*)safeBuffer + dictSize;
         streamPtr->base = streamPtr->end - endIndex;
         streamPtr->dictLimit = endIndex - (U32)dictSize;
         streamPtr->lowLimit = endIndex - (U32)dictSize;
-        if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
+        if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+            streamPtr->nextToUpdate = streamPtr->dictLimit;
     }
     return dictSize;
 }
@@ -1287,8 +1323,13 @@
                                     const dictCtx_directive dict,
                                     const HCfavor_e favorDecSpeed)
 {
+    int retval = 0;
 #define TRAILING_LITERALS 3
+#ifdef LZ4HC_HEAPMODE
+    LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
+#else
     LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* ~64 KB, which is a bit large for stack... */
+#endif
 
     const BYTE* ip = (const BYTE*) source;
     const BYTE* anchor = ip;
@@ -1298,15 +1339,19 @@
     BYTE* op = (BYTE*) dst;
     BYTE* opSaved = (BYTE*) dst;
     BYTE* oend = op + dstCapacity;
+    int ovml = MINMATCH;  /* overflow - last sequence */
+    const BYTE* ovref = NULL;
 
     /* init */
+#ifdef LZ4HC_HEAPMODE
+    if (opt == NULL) goto _return_label;
+#endif
     DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
     *srcSizePtr = 0;
     if (limit == fillOutput) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
     if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
 
     /* Main Loop */
-    assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
     while (ip <= mflimit) {
          int const llen = (int)(ip - anchor);
          int best_mlen, best_off;
@@ -1320,8 +1365,11 @@
              int const firstML = firstMatch.len;
              const BYTE* const matchPos = ip - firstMatch.off;
              opSaved = op;
-             if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
+             if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) {  /* updates ip, op and anchor */
+                 ovml = firstML;
+                 ovref = matchPos;
                  goto _dest_overflow;
+             }
              continue;
          }
 
@@ -1463,7 +1511,7 @@
          best_off = opt[last_match_pos].off;
          cur = last_match_pos - best_mlen;
 
- encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+encode: /* cur, last_match_pos, best_mlen, best_off must be set */
          assert(cur < LZ4_OPT_NUM);
          assert(last_match_pos >= 1);  /* == 1 when only one candidate */
          DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
@@ -1493,25 +1541,31 @@
                  assert(ml >= MINMATCH);
                  assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
                  opSaved = op;
-                 if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
+                 if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) {  /* updates ip, op and anchor */
+                     ovml = ml;
+                     ovref = ip - offset;
                      goto _dest_overflow;
-         }   }
+         }   }   }
      }  /* while (ip <= mflimit) */
 
- _last_literals:
+_last_literals:
      /* Encode Last Literals */
      {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
-         size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
-         size_t const totalSize = 1 + litLength + lastRunSize;
+         size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+         size_t const totalSize = 1 + llAdd + lastRunSize;
          if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
          if (limit && (op + totalSize > oend)) {
-             if (limit == limitedOutput) return 0;  /* Check output limit */
+             if (limit == limitedOutput) { /* Check output limit */
+                retval = 0;
+                goto _return_label;
+             }
              /* adapt lastRunSize to fill 'dst' */
-             lastRunSize  = (size_t)(oend - op) - 1;
-             litLength = (lastRunSize + 255 - RUN_MASK) / 255;
-             lastRunSize -= litLength;
+             lastRunSize  = (size_t)(oend - op) - 1 /*token*/;
+             llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+             lastRunSize -= llAdd;
          }
-         ip = anchor + lastRunSize;
+         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+         ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
 
          if (lastRunSize >= RUN_MASK) {
              size_t accumulator = lastRunSize - RUN_MASK;
@@ -1527,12 +1581,35 @@
 
      /* End */
      *srcSizePtr = (int) (((const char*)ip) - source);
-     return (int) ((char*)op-dst);
+     retval = (int) ((char*)op-dst);
+     goto _return_label;
 
- _dest_overflow:
-     if (limit == fillOutput) {
-         op = opSaved;  /* restore correct out pointer */
-         goto _last_literals;
-     }
-     return 0;
- }
+_dest_overflow:
+if (limit == fillOutput) {
+     /* Assumption : ip, anchor, ovml and ovref must be set correctly */
+     size_t const ll = (size_t)(ip - anchor);
+     size_t const ll_addbytes = (ll + 240) / 255;
+     size_t const ll_totalCost = 1 + ll_addbytes + ll;
+     BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+     DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
+     op = opSaved;  /* restore correct out pointer */
+     if (op + ll_totalCost <= maxLitPos) {
+         /* ll validated; now adjust match length */
+         size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+         size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+         assert(maxMlSize < INT_MAX); assert(ovml >= 0);
+         if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
+         if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
+             DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
+             DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
+             LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
+             DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
+     }   }
+     goto _last_literals;
+}
+_return_label:
+#ifdef LZ4HC_HEAPMODE
+     FREEMEM(opt);
+#endif
+     return retval;
+}
diff --git a/lib/lz4hc.h b/lib/lz4hc.h
index 44e35bb..3d441fb 100644
--- a/lib/lz4hc.h
+++ b/lib/lz4hc.h
@@ -198,57 +198,32 @@
 #define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
 
 
-#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#include <stdint.h>
-
 typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
 struct LZ4HC_CCtx_internal
 {
-    uint32_t   hashTable[LZ4HC_HASHTABLESIZE];
-    uint16_t   chainTable[LZ4HC_MAXD];
-    const uint8_t* end;         /* next block here to continue on current prefix */
-    const uint8_t* base;        /* All index relative to this position */
-    const uint8_t* dictBase;    /* alternate base for extDict */
-    uint32_t   dictLimit;       /* below that point, need extDict */
-    uint32_t   lowLimit;        /* below that point, no more dict */
-    uint32_t   nextToUpdate;    /* index from which to continue dictionary update */
-    short      compressionLevel;
-    int8_t     favorDecSpeed;   /* favor decompression speed if this flag set,
-                                   otherwise, favor compression ratio */
-    int8_t     dirty;           /* stream has to be fully reset if this flag is set */
+    LZ4_u32   hashTable[LZ4HC_HASHTABLESIZE];
+    LZ4_u16   chainTable[LZ4HC_MAXD];
+    const LZ4_byte* end;       /* next block here to continue on current prefix */
+    const LZ4_byte* base;      /* All index relative to this position */
+    const LZ4_byte* dictBase;  /* alternate base for extDict */
+    LZ4_u32   dictLimit;       /* below that point, need extDict */
+    LZ4_u32   lowLimit;        /* below that point, no more dict */
+    LZ4_u32   nextToUpdate;    /* index from which to continue dictionary update */
+    short     compressionLevel;
+    LZ4_i8    favorDecSpeed;   /* favor decompression speed if this flag set,
+                                  otherwise, favor compression ratio */
+    LZ4_i8    dirty;           /* stream has to be fully reset if this flag is set */
     const LZ4HC_CCtx_internal* dictCtx;
 };
 
-#else
-
-typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
-struct LZ4HC_CCtx_internal
-{
-    unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
-    unsigned short chainTable[LZ4HC_MAXD];
-    const unsigned char* end;        /* next block here to continue on current prefix */
-    const unsigned char* base;       /* All index relative to this position */
-    const unsigned char* dictBase;   /* alternate base for extDict */
-    unsigned int   dictLimit;        /* below that point, need extDict */
-    unsigned int   lowLimit;         /* below that point, no more dict */
-    unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
-    short          compressionLevel;
-    char           favorDecSpeed;    /* favor decompression speed if this flag set,
-                                        otherwise, favor compression ratio */
-    char           dirty;            /* stream has to be fully reset if this flag is set */
-    const LZ4HC_CCtx_internal* dictCtx;
-};
-
-#endif
-
 
 /* Do not use these definitions directly !
  * Declare or allocate an LZ4_streamHC_t instead.
  */
-#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/
-#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+#define LZ4_STREAMHCSIZE       262200  /* static size, for inter-version compatibility */
+#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
 union LZ4_streamHC_u {
-    size_t table[LZ4_STREAMHCSIZE_SIZET];
+    void* table[LZ4_STREAMHCSIZE_VOIDP];
     LZ4HC_CCtx_internal internal_donotuse;
 }; /* previously typedef'd to LZ4_streamHC_t */
 
diff --git a/ossfuzz/Makefile b/ossfuzz/Makefile
index 6875eb6..2ec1675 100644
--- a/ossfuzz/Makefile
+++ b/ossfuzz/Makefile
@@ -26,7 +26,7 @@
 # ##########################################################################
 
 LZ4DIR  := ../lib
-LIB_FUZZING_ENGINE ?= standaloneengine.o
+LIB_FUZZING_ENGINE ?=
 
 DEBUGLEVEL?= 1
 DEBUGFLAGS = -g -DLZ4_DEBUG=$(DEBUGLEVEL)
@@ -47,6 +47,7 @@
 	round_trip_frame_fuzzer \
 	decompress_frame_fuzzer
 
+.PHONY: all
 all: $(FUZZERS)
 
 # Include a rule to build the static library if calling this target
@@ -58,17 +59,20 @@
 	$(CC) -c $(LZ4_CFLAGS) $(LZ4_CPPFLAGS) $< -o $@
 
 # Generic rule for generating fuzzers
-%_fuzzer: %_fuzzer.o lz4_helpers.o $(LZ4DIR)/liblz4.a
-	# Compile the standalone code just in case. The OSS-Fuzz code might
-	# override the LIB_FUZZING_ENGINE value to "-fsanitize=fuzzer"
-	$(CC) -c $(LZ4_CFLAGS) $(LZ4_CPPFLAGS) standaloneengine.c -o standaloneengine.o
-
-	# Now compile the actual fuzzer.
+ifeq ($(LIB_FUZZING_ENGINE),)
+  LIB_FUZZING_DEPS := standaloneengine.o
+else
+  LIB_FUZZING_DEPS :=
+endif
+%_fuzzer: %_fuzzer.o lz4_helpers.o fuzz_data_producer.o $(LZ4DIR)/liblz4.a $(LIB_FUZZING_DEPS)
 	$(CXX) $(LZ4_CXXFLAGS) $(LZ4_CPPFLAGS) $(LDFLAGS) $(LIB_FUZZING_ENGINE) $^ -o $@$(EXT)
 
 %_fuzzer_clean:
 	$(RM) $*_fuzzer $*_fuzzer.o standaloneengine.o
 
 .PHONY: clean
-clean: compress_fuzzer_clean decompress_fuzzer_clean
+clean: compress_fuzzer_clean decompress_fuzzer_clean \
+	compress_frame_fuzzer_clean compress_hc_fuzzer_clean \
+	decompress_frame_fuzzer_clean round_trip_frame_fuzzer_clean \
+	round_trip_fuzzer_clean round_trip_hc_fuzzer_clean round_trip_stream_fuzzer_clean
 	$(MAKE) -C $(LZ4DIR) clean
diff --git a/ossfuzz/compress_frame_fuzzer.c b/ossfuzz/compress_frame_fuzzer.c
index 75c609f..568ae14 100644
--- a/ossfuzz/compress_frame_fuzzer.c
+++ b/ossfuzz/compress_frame_fuzzer.c
@@ -13,18 +13,23 @@
 #include "lz4.h"
 #include "lz4frame.h"
 #include "lz4_helpers.h"
+#include "fuzz_data_producer.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&data, &size);
-    LZ4F_preferences_t const prefs = FUZZ_randomPreferences(&seed);
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    LZ4F_preferences_t const prefs = FUZZ_dataProducer_preferences(producer);
+    size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
+
     size_t const compressBound = LZ4F_compressFrameBound(size, &prefs);
-    size_t const dstCapacity = FUZZ_rand32(&seed, 0, compressBound);
+    size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, compressBound);
+
     char* const dst = (char*)malloc(dstCapacity);
     char* const rt = (char*)malloc(size);
 
-    FUZZ_ASSERT(dst);
-    FUZZ_ASSERT(rt);
+    FUZZ_ASSERT(dst!=NULL);
+    FUZZ_ASSERT(rt!=NULL);
 
     /* If compression succeeds it must round trip correctly. */
     size_t const dstSize =
@@ -37,6 +42,7 @@
 
     free(dst);
     free(rt);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/compress_fuzzer.c b/ossfuzz/compress_fuzzer.c
index 7021624..edc8aad 100644
--- a/ossfuzz/compress_fuzzer.c
+++ b/ossfuzz/compress_fuzzer.c
@@ -10,12 +10,18 @@
 #include <string.h>
 
 #include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
 #include "lz4.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&data, &size);
-    size_t const dstCapacity = FUZZ_rand32(&seed, 0, LZ4_compressBound(size));
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
+
+    size_t const compressBound = LZ4_compressBound(size);
+    size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, compressBound);
+
     char* const dst = (char*)malloc(dstCapacity);
     char* const rt = (char*)malloc(size);
 
@@ -46,6 +52,7 @@
 
     free(dst);
     free(rt);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/compress_hc_fuzzer.c b/ossfuzz/compress_hc_fuzzer.c
index 4841367..7d8e45a 100644
--- a/ossfuzz/compress_hc_fuzzer.c
+++ b/ossfuzz/compress_hc_fuzzer.c
@@ -10,16 +10,22 @@
 #include <string.h>
 
 #include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
 #include "lz4.h"
 #include "lz4hc.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&data, &size);
-    size_t const dstCapacity = FUZZ_rand32(&seed, 0, LZ4_compressBound(size));
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer);
+    size_t const levelSeed = FUZZ_dataProducer_retrieve32(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
+
+    size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, size);
+    int const level = FUZZ_getRange_from_uint32(levelSeed, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
+
     char* const dst = (char*)malloc(dstCapacity);
     char* const rt = (char*)malloc(size);
-    int const level = FUZZ_rand32(&seed, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
 
     FUZZ_ASSERT(dst);
     FUZZ_ASSERT(rt);
@@ -52,6 +58,7 @@
 
     free(dst);
     free(rt);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/decompress_frame_fuzzer.c b/ossfuzz/decompress_frame_fuzzer.c
index bda25b0..0fcbb16 100644
--- a/ossfuzz/decompress_frame_fuzzer.c
+++ b/ossfuzz/decompress_frame_fuzzer.c
@@ -9,6 +9,7 @@
 #include <string.h>
 
 #include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
 #include "lz4.h"
 #define LZ4F_STATIC_LINKING_ONLY
 #include "lz4frame.h"
@@ -29,11 +30,17 @@
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer);
+    size_t const dictSizeSeed = FUZZ_dataProducer_retrieve32(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
 
-    uint32_t seed = FUZZ_seed(&data, &size);
-    size_t const dstCapacity = FUZZ_rand32(&seed, 0, 4 * size);
+    size_t const dstCapacity = FUZZ_getRange_from_uint32(
+      dstCapacitySeed, 0, 4 * size);
     size_t const largeDictSize = 64 * 1024;
-    size_t const dictSize = FUZZ_rand32(&seed, 0, largeDictSize);
+    size_t const dictSize = FUZZ_getRange_from_uint32(
+      dictSizeSeed, 0, largeDictSize);
+
     char* const dst = (char*)malloc(dstCapacity);
     char* const dict = (char*)malloc(dictSize);
     LZ4F_decompressOptions_t opts;
@@ -62,6 +69,7 @@
     LZ4F_freeDecompressionContext(dctx);
     free(dst);
     free(dict);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/decompress_fuzzer.c b/ossfuzz/decompress_fuzzer.c
index 0267c93..6f48e30 100644
--- a/ossfuzz/decompress_fuzzer.c
+++ b/ossfuzz/decompress_fuzzer.c
@@ -9,13 +9,16 @@
 #include <string.h>
 
 #include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
 #include "lz4.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
 
-    uint32_t seed = FUZZ_seed(&data, &size);
-    size_t const dstCapacity = FUZZ_rand32(&seed, 0, 4 * size);
+    size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, 4 * size);
     size_t const smallDictSize = size + 1;
     size_t const largeDictSize = 64 * 1024 - 1;
     size_t const dictSize = MAX(smallDictSize, largeDictSize);
@@ -53,6 +56,7 @@
                                 dstCapacity, dstCapacity);
     free(dst);
     free(dict);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/fuzz_data_producer.c b/ossfuzz/fuzz_data_producer.c
new file mode 100644
index 0000000..670fbf5
--- /dev/null
+++ b/ossfuzz/fuzz_data_producer.c
@@ -0,0 +1,77 @@
+#include "fuzz_data_producer.h"
+
+struct FUZZ_dataProducer_s{
+  const uint8_t *data;
+  size_t size;
+};
+
+FUZZ_dataProducer_t* FUZZ_dataProducer_create(const uint8_t* data, size_t size) {
+  FUZZ_dataProducer_t* const producer = malloc(sizeof(FUZZ_dataProducer_t));
+
+  FUZZ_ASSERT(producer != NULL);
+
+  producer->data = data;
+  producer->size = size;
+  return producer;
+}
+
+void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); }
+
+uint32_t FUZZ_dataProducer_retrieve32(FUZZ_dataProducer_t *producer) {
+    const uint8_t* data = producer->data;
+    const size_t size = producer->size;
+    if (size == 0) {
+        return 0;
+    } else if (size < 4) {
+        producer->size -= 1;
+        return (uint32_t)data[size - 1];
+    } else {
+        producer->size -= 4;
+        return *(data + size - 4);
+    }
+}
+
+uint32_t FUZZ_getRange_from_uint32(uint32_t seed, uint32_t min, uint32_t max)
+{
+    uint32_t range = max - min;
+    if (range == 0xffffffff) {
+      return seed;
+    }
+    return min + seed % (range + 1);
+}
+
+uint32_t FUZZ_dataProducer_range32(FUZZ_dataProducer_t* producer,
+    uint32_t min, uint32_t max)
+{
+    size_t const seed = FUZZ_dataProducer_retrieve32(producer);
+    return FUZZ_getRange_from_uint32(seed, min, max);
+}
+
+LZ4F_frameInfo_t FUZZ_dataProducer_frameInfo(FUZZ_dataProducer_t* producer)
+{
+    LZ4F_frameInfo_t info = LZ4F_INIT_FRAMEINFO;
+    info.blockSizeID = FUZZ_dataProducer_range32(producer, LZ4F_max64KB - 1, LZ4F_max4MB);
+    if (info.blockSizeID < LZ4F_max64KB) {
+        info.blockSizeID = LZ4F_default;
+    }
+    info.blockMode = FUZZ_dataProducer_range32(producer, LZ4F_blockLinked, LZ4F_blockIndependent);
+    info.contentChecksumFlag = FUZZ_dataProducer_range32(producer, LZ4F_noContentChecksum,
+                                           LZ4F_contentChecksumEnabled);
+    info.blockChecksumFlag = FUZZ_dataProducer_range32(producer, LZ4F_noBlockChecksum,
+                                         LZ4F_blockChecksumEnabled);
+    return info;
+}
+
+LZ4F_preferences_t FUZZ_dataProducer_preferences(FUZZ_dataProducer_t* producer)
+{
+    LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
+    prefs.frameInfo = FUZZ_dataProducer_frameInfo(producer);
+    prefs.compressionLevel = FUZZ_dataProducer_range32(producer, 0, LZ4HC_CLEVEL_MAX + 3) - 3;
+    prefs.autoFlush = FUZZ_dataProducer_range32(producer, 0, 1);
+    prefs.favorDecSpeed = FUZZ_dataProducer_range32(producer, 0, 1);
+    return prefs;
+}
+
+size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){
+  return producer->size;
+}
diff --git a/ossfuzz/fuzz_data_producer.h b/ossfuzz/fuzz_data_producer.h
new file mode 100644
index 0000000..b96dcba
--- /dev/null
+++ b/ossfuzz/fuzz_data_producer.h
@@ -0,0 +1,36 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fuzz_helpers.h"
+#include "lz4frame.h"
+#include "lz4hc.h"
+
+/* Struct used for maintaining the state of the data */
+typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t;
+
+/* Returns a data producer state struct. Use for producer initialization. */
+FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size);
+
+/* Frees the data producer */
+void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer);
+
+/* Returns 32 bits from the end of data */
+uint32_t FUZZ_dataProducer_retrieve32(FUZZ_dataProducer_t *producer);
+
+/* Returns value between [min, max] */
+uint32_t FUZZ_getRange_from_uint32(uint32_t seed, uint32_t min, uint32_t max);
+
+/* Combination of above two functions for non adaptive use cases. ie where size is not involved */
+uint32_t FUZZ_dataProducer_range32(FUZZ_dataProducer_t *producer, uint32_t min,
+                                  uint32_t max);
+
+/* Returns lz4 preferences */
+LZ4F_preferences_t FUZZ_dataProducer_preferences(FUZZ_dataProducer_t* producer);
+
+/* Returns lz4 frame info */
+LZ4F_frameInfo_t FUZZ_dataProducer_frameInfo(FUZZ_dataProducer_t* producer);
+
+/* Returns the size of the remaining bytes of data in the producer */
+size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer);
diff --git a/ossfuzz/round_trip_frame_fuzzer.c b/ossfuzz/round_trip_frame_fuzzer.c
index 1eea90c..149542d 100644
--- a/ossfuzz/round_trip_frame_fuzzer.c
+++ b/ossfuzz/round_trip_frame_fuzzer.c
@@ -12,14 +12,17 @@
 #include "lz4.h"
 #include "lz4frame.h"
 #include "lz4_helpers.h"
+#include "fuzz_data_producer.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&data, &size);
-    LZ4F_preferences_t const prefs = FUZZ_randomPreferences(&seed);
-    size_t const dstCapacity = LZ4F_compressFrameBound(size, &prefs);
+    FUZZ_dataProducer_t* producer = FUZZ_dataProducer_create(data, size);
+    LZ4F_preferences_t const prefs = FUZZ_dataProducer_preferences(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
+
+    size_t const dstCapacity = LZ4F_compressFrameBound(LZ4_compressBound(size), &prefs);
     char* const dst = (char*)malloc(dstCapacity);
-    char* const rt = (char*)malloc(size);
+    char* const rt = (char*)malloc(FUZZ_dataProducer_remainingBytes(producer));
 
     FUZZ_ASSERT(dst);
     FUZZ_ASSERT(rt);
@@ -34,6 +37,7 @@
 
     free(dst);
     free(rt);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/round_trip_fuzzer.c b/ossfuzz/round_trip_fuzzer.c
index 3a66e80..6307058 100644
--- a/ossfuzz/round_trip_fuzzer.c
+++ b/ossfuzz/round_trip_fuzzer.c
@@ -10,11 +10,17 @@
 
 #include "fuzz_helpers.h"
 #include "lz4.h"
+#include "fuzz_data_producer.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&data, &size);
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    size_t const partialCapacitySeed = FUZZ_dataProducer_retrieve32(producer);
+    size = FUZZ_dataProducer_remainingBytes(producer);
+
+    size_t const partialCapacity = FUZZ_getRange_from_uint32(partialCapacitySeed, 0, size);
     size_t const dstCapacity = LZ4_compressBound(size);
+
     char* const dst = (char*)malloc(dstCapacity);
     char* const rt = (char*)malloc(size);
 
@@ -32,7 +38,6 @@
 
     /* Partial decompression must succeed. */
     {
-        size_t const partialCapacity = FUZZ_rand32(&seed, 0, size);
         char* const partial = (char*)malloc(partialCapacity);
         FUZZ_ASSERT(partial);
         int const partialSize = LZ4_decompress_safe_partial(
@@ -43,8 +48,10 @@
         free(partial);
     }
 
+
     free(dst);
     free(rt);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/round_trip_hc_fuzzer.c b/ossfuzz/round_trip_hc_fuzzer.c
index 325cdf0..7d03ee2 100644
--- a/ossfuzz/round_trip_hc_fuzzer.c
+++ b/ossfuzz/round_trip_hc_fuzzer.c
@@ -9,16 +9,20 @@
 #include <string.h>
 
 #include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
 #include "lz4.h"
 #include "lz4hc.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&data, &size);
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+    int const level = FUZZ_dataProducer_range32(producer,
+        LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
+    size = FUZZ_dataProducer_remainingBytes(producer);
+
     size_t const dstCapacity = LZ4_compressBound(size);
     char* const dst = (char*)malloc(dstCapacity);
     char* const rt = (char*)malloc(size);
-    int const level = FUZZ_rand32(&seed, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
 
     FUZZ_ASSERT(dst);
     FUZZ_ASSERT(rt);
@@ -34,6 +38,7 @@
 
     free(dst);
     free(rt);
+    FUZZ_dataProducer_free(producer);
 
     return 0;
 }
diff --git a/ossfuzz/travisoss.sh b/ossfuzz/travisoss.sh
index 5ea884c..eae9a80 100755
--- a/ossfuzz/travisoss.sh
+++ b/ossfuzz/travisoss.sh
@@ -12,7 +12,12 @@
 fi
 
 # Modify the oss-fuzz Dockerfile so that we're checking out the current branch on travis.
-sed -i "s@https://github.com/lz4/lz4.git@-b $TRAVIS_BRANCH https://github.com/lz4/lz4.git@" /tmp/ossfuzz/projects/lz4/Dockerfile
+if [ "x${TRAVIS_PULL_REQUEST}" = "xfalse" ]
+then
+    sed -i "s@https://github.com/lz4/lz4.git@-b ${TRAVIS_BRANCH} https://github.com/lz4/lz4.git@" /tmp/ossfuzz/projects/lz4/Dockerfile
+else
+    sed -i "s@https://github.com/lz4/lz4.git@-b ${TRAVIS_PULL_REQUEST_BRANCH} https://github.com/${TRAVIS_PULL_REQUEST_SLUG}.git@" /tmp/ossfuzz/projects/lz4/Dockerfile
+fi
 
 # Try and build the fuzzers
 pushd /tmp/ossfuzz
diff --git a/programs/.gitignore b/programs/.gitignore
index daa7f14..9ffadd9 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -4,6 +4,7 @@
 lz4cat
 lz4c
 lz4c32
+lz4-wlib
 datagen
 frametest
 frametest32
diff --git a/programs/Makefile b/programs/Makefile
index 4994551..c1053f6 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -22,7 +22,7 @@
 #
 # You can contact the author at :
 #  - LZ4 homepage : http://www.lz4.org
-#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 source repository : https://github.com/lz4/lz4
 # ##########################################################################
 # lz4 : Command Line Utility, supporting gzip-like arguments
 # lz4c  : CLU, supporting also legacy lz4demo arguments
@@ -41,12 +41,13 @@
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER   := $(shell echo $(LIBVER_SCRIPT))
 
-SRCFILES := $(sort $(wildcard $(LZ4DIR)/*.c) $(wildcard *.c))
-OBJFILES := $(SRCFILES:.c=.o)
+LIBFILES  = $(wildcard $(LZ4DIR)/*.c)
+SRCFILES  = $(sort $(LIBFILES) $(wildcard *.c))
+OBJFILES  = $(SRCFILES:.c=.o)
 
 CPPFLAGS += -I$(LZ4DIR) -DXXH_NAMESPACE=LZ4_
 CFLAGS   ?= -O3
-DEBUGFLAGS:=-Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
+DEBUGFLAGS= -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
             -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
             -Wpointer-arith -Wstrict-aliasing=1
 CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
@@ -82,13 +83,25 @@
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 else
 lz4: $(OBJFILES)
-	$(CC) $(FLAGS) $^ -o $@$(EXT)
+	$(CC) $(FLAGS) $(OBJFILES) -o $@$(EXT) $(LDLIBS)
 endif
 
-
+.PHONY: lz4-release
 lz4-release: DEBUGFLAGS=
 lz4-release: lz4
 
+lz4-wlib: LIBFILES =
+lz4-wlib: SRCFILES+= $(LZ4DIR)/xxhash.c  # benchmark unit needs XXH64()
+lz4-wlib: LDFLAGS += -L $(LZ4DIR)
+lz4-wlib: LDLIBS   = -llz4
+lz4-wlib: liblz4 $(OBJFILES)
+	@echo WARNING: $@ must link to an extended variant of the dynamic library which also exposes unstable symbols
+	$(CC) $(FLAGS) $(OBJFILES) -o $@$(EXT) $(LDLIBS)
+
+.PHONY:liblz4
+liblz4:
+	CPPFLAGS="-DLZ4F_PUBLISH_STATIC_FUNCTIONS -DLZ4_PUBLISH_STATIC_FUNCTIONS" $(MAKE) -C $(LZ4DIR) liblz4
+
 lz4c: lz4
 	$(LN_SF) lz4$(EXT) lz4c$(EXT)
 
@@ -113,7 +126,8 @@
 endif
 	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(RM) core *.o *.test tmp* \
-           lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4$(EXT) lz4cat$(EXT)
+           lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) lz4-wlib$(EXT) \
+           unlz4$(EXT) lz4cat$(EXT)
 	@echo Cleaning completed
 
 
diff --git a/programs/bench.c b/programs/bench.c
index 5934935..3357d14 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -45,17 +45,176 @@
 
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"
+#include "bench.h"
 
-
+#define LZ4_STATIC_LINKING_ONLY
 #include "lz4.h"
-#define COMPRESSOR0 LZ4_compress_local
-static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) {
-  int const acceleration = (clevel < 0) ? -clevel + 1 : 1;
-  return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
-}
+#define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
-#define COMPRESSOR1 LZ4_compress_HC
-#define DEFAULTCOMPRESSOR COMPRESSOR0
+
+
+/* *************************************
+*  Compression parameters and functions
+***************************************/
+
+struct compressionParameters
+{
+    int cLevel;
+    const char* dictBuf;
+    int dictSize;
+
+    LZ4_stream_t* LZ4_stream;
+    LZ4_stream_t* LZ4_dictStream;
+    LZ4_streamHC_t* LZ4_streamHC;
+    LZ4_streamHC_t* LZ4_dictStreamHC;
+
+    void (*initFunction)(
+        struct compressionParameters* pThis);
+    void (*resetFunction)(
+        const struct compressionParameters* pThis);
+    int (*blockFunction)(
+        const struct compressionParameters* pThis,
+        const char* src, char* dst, int srcSize, int dstSize);
+    void (*cleanupFunction)(
+        const struct compressionParameters* pThis);
+};
+
+static void LZ4_compressInitNoStream(
+    struct compressionParameters* pThis)
+{
+    pThis->LZ4_stream = NULL;
+    pThis->LZ4_dictStream = NULL;
+    pThis->LZ4_streamHC = NULL;
+    pThis->LZ4_dictStreamHC = NULL;
+}
+
+static void LZ4_compressInitStream(
+    struct compressionParameters* pThis)
+{
+    pThis->LZ4_stream = LZ4_createStream();
+    pThis->LZ4_dictStream = LZ4_createStream();
+    pThis->LZ4_streamHC = NULL;
+    pThis->LZ4_dictStreamHC = NULL;
+    LZ4_loadDict(pThis->LZ4_dictStream, pThis->dictBuf, pThis->dictSize);
+}
+
+static void LZ4_compressInitStreamHC(
+    struct compressionParameters* pThis)
+{
+    pThis->LZ4_stream = NULL;
+    pThis->LZ4_dictStream = NULL;
+    pThis->LZ4_streamHC = LZ4_createStreamHC();
+    pThis->LZ4_dictStreamHC = LZ4_createStreamHC();
+    LZ4_loadDictHC(pThis->LZ4_dictStreamHC, pThis->dictBuf, pThis->dictSize);
+}
+
+static void LZ4_compressResetNoStream(
+    const struct compressionParameters* pThis)
+{
+    (void)pThis;
+}
+
+static void LZ4_compressResetStream(
+    const struct compressionParameters* pThis)
+{
+    LZ4_resetStream_fast(pThis->LZ4_stream);
+    LZ4_attach_dictionary(pThis->LZ4_stream, pThis->LZ4_dictStream);
+}
+
+static void LZ4_compressResetStreamHC(
+    const struct compressionParameters* pThis)
+{
+    LZ4_resetStreamHC_fast(pThis->LZ4_streamHC, pThis->cLevel);
+    LZ4_attach_HC_dictionary(pThis->LZ4_streamHC, pThis->LZ4_dictStreamHC);
+}
+
+static int LZ4_compressBlockNoStream(
+    const struct compressionParameters* pThis,
+    const char* src, char* dst,
+    int srcSize, int dstSize)
+{
+    int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
+    return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
+}
+
+static int LZ4_compressBlockNoStreamHC(
+    const struct compressionParameters* pThis,
+    const char* src, char* dst,
+    int srcSize, int dstSize)
+{
+    return LZ4_compress_HC(src, dst, srcSize, dstSize, pThis->cLevel);
+}
+
+static int LZ4_compressBlockStream(
+    const struct compressionParameters* pThis,
+    const char* src, char* dst,
+    int srcSize, int dstSize)
+{
+    int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
+    return LZ4_compress_fast_continue(pThis->LZ4_stream, src, dst, srcSize, dstSize, acceleration);
+}
+
+static int LZ4_compressBlockStreamHC(
+    const struct compressionParameters* pThis,
+    const char* src, char* dst,
+    int srcSize, int dstSize)
+{
+    return LZ4_compress_HC_continue(pThis->LZ4_streamHC, src, dst, srcSize, dstSize);
+}
+
+static void LZ4_compressCleanupNoStream(
+    const struct compressionParameters* pThis)
+{
+    (void)pThis;
+}
+
+static void LZ4_compressCleanupStream(
+    const struct compressionParameters* pThis)
+{
+    LZ4_freeStream(pThis->LZ4_stream);
+    LZ4_freeStream(pThis->LZ4_dictStream);
+}
+
+static void LZ4_compressCleanupStreamHC(
+    const struct compressionParameters* pThis)
+{
+    LZ4_freeStreamHC(pThis->LZ4_streamHC);
+    LZ4_freeStreamHC(pThis->LZ4_dictStreamHC);
+}
+
+static void LZ4_buildCompressionParameters(
+    struct compressionParameters* pParams,
+    int cLevel, const char* dictBuf, int dictSize)
+{
+    pParams->cLevel = cLevel;
+    pParams->dictBuf = dictBuf;
+    pParams->dictSize = dictSize;
+
+    if (dictSize) {
+        if (cLevel < LZ4HC_CLEVEL_MIN) {
+            pParams->initFunction = LZ4_compressInitStream;
+            pParams->resetFunction = LZ4_compressResetStream;
+            pParams->blockFunction = LZ4_compressBlockStream;
+            pParams->cleanupFunction = LZ4_compressCleanupStream;
+        } else {
+            pParams->initFunction = LZ4_compressInitStreamHC;
+            pParams->resetFunction = LZ4_compressResetStreamHC;
+            pParams->blockFunction = LZ4_compressBlockStreamHC;
+            pParams->cleanupFunction = LZ4_compressCleanupStreamHC;
+        }
+    } else {
+        pParams->initFunction = LZ4_compressInitNoStream;
+        pParams->resetFunction = LZ4_compressResetNoStream;
+        pParams->cleanupFunction = LZ4_compressCleanupNoStream;
+
+        if (cLevel < LZ4HC_CLEVEL_MIN) {
+            pParams->blockFunction = LZ4_compressBlockNoStream;
+        } else {
+            pParams->blockFunction = LZ4_compressBlockNoStreamHC;
+        }
+    }
+}
+
 #define LZ4_isError(errcode) (errcode==0)
 
 
@@ -79,6 +238,8 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
+#define LZ4_MAX_DICT_SIZE (64 KB)
+
 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
 
 static U32 g_compressibilityDefault = 50;
@@ -152,17 +313,13 @@
     size_t resSize;
 } blockParam_t;
 
-struct compressionParameters
-{
-    int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
-};
-
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
 
 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         const char* displayName, int cLevel,
-                        const size_t* fileSizes, U32 nbFiles)
+                        const size_t* fileSizes, U32 nbFiles,
+                        const char* dictBuf, int dictSize)
 {
     size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
     U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
@@ -172,27 +329,16 @@
     void* const resultBuffer = malloc(srcSize);
     U32 nbBlocks;
     struct compressionParameters compP;
-    int cfunctionId;
 
     /* checks */
     if (!compressedBuffer || !resultBuffer || !blockTable)
         EXM_THROW(31, "allocation error : not enough memory");
 
-    /* init */
     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
 
-    /* Init */
-    if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1;
-    switch (cfunctionId)
-    {
-#ifdef COMPRESSOR0
-    case 0 : compP.compressionFunction = COMPRESSOR0; break;
-#endif
-#ifdef COMPRESSOR1
-    case 1 : compP.compressionFunction = COMPRESSOR1; break;
-#endif
-    default : compP.compressionFunction = DEFAULTCOMPRESSOR;
-    }
+    /* init */
+    LZ4_buildCompressionParameters(&compP, cLevel, dictBuf, dictSize);
+    compP.initFunction(&compP);
 
     /* Init blockTable data */
     {   const char* srcPtr = (const char*)srcBuffer;
@@ -256,8 +402,12 @@
                 U32 nbLoops;
                 for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) {
                     U32 blockNb;
+                    compP.resetFunction(&compP);
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                        size_t const rSize = (size_t)compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel);
+                        size_t const rSize = (size_t)compP.blockFunction(
+                            &compP,
+                            blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr,
+                            (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom);
                         if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4 compression failed");
                         blockTable[blockNb].cSize = rSize;
                 }   }
@@ -298,9 +448,12 @@
                 for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                        int const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize);
+                        int const regenSize = LZ4_decompress_safe_usingDict(
+                            blockTable[blockNb].cPtr, blockTable[blockNb].resPtr,
+                            (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize,
+                            dictBuf, dictSize);
                         if (regenSize < 0) {
-                            DISPLAY("LZ4_decompress_safe() failed on block %u \n", blockNb);
+                            DISPLAY("LZ4_decompress_safe_usingDict() failed on block %u \n", blockNb);
                             break;
                         }
                         blockTable[blockNb].resSize = (size_t)regenSize;
@@ -364,6 +517,7 @@
     }   /* Bench */
 
     /* clean up */
+    compP.cleanupFunction(&compP);
     free(blockTable);
     free(compressedBuffer);
     free(resultBuffer);
@@ -397,7 +551,8 @@
 
 static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
                             const char* displayName, int cLevel, int cLevelLast,
-                            const size_t* fileSizes, unsigned nbFiles)
+                            const size_t* fileSizes, unsigned nbFiles,
+                            const char* dictBuf, int dictSize)
 {
     int l;
 
@@ -415,7 +570,8 @@
     for (l=cLevel; l <= cLevelLast; l++) {
         BMK_benchMem(srcBuffer, benchedSize,
                      displayName, l,
-                     fileSizes, nbFiles);
+                     fileSizes, nbFiles,
+                     dictBuf, dictSize);
     }
 }
 
@@ -456,7 +612,8 @@
 }
 
 static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
-                               int cLevel, int cLevelLast)
+                               int cLevel, int cLevelLast,
+                               const char* dictBuf, int dictSize)
 {
     void* srcBuffer;
     size_t benchedSize;
@@ -488,7 +645,8 @@
     {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
         BMK_benchCLevel(srcBuffer, benchedSize,
                         displayName, cLevel, cLevelLast,
-                        fileSizes, nbFiles);
+                        fileSizes, nbFiles,
+                        dictBuf, dictSize);
     }
 
     /* clean up */
@@ -497,7 +655,8 @@
 }
 
 
-static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility)
+static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
+                              const char* dictBuf, int dictSize)
 {
     char name[20] = {0};
     size_t benchedSize = 10000000;
@@ -511,7 +670,7 @@
 
     /* Bench */
     snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
-    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1);
+    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, dictBuf, dictSize);
 
     /* clean up */
     free(srcBuffer);
@@ -519,7 +678,8 @@
 
 
 int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles,
-                   int cLevel, int cLevelLast)
+                   int cLevel, int cLevelLast,
+                   const char* dictBuf, int dictSize)
 {
     unsigned fileNb;
     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
@@ -528,29 +688,59 @@
     if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
 
     for (fileNb=0; fileNb<nbFiles; fileNb++)
-        BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast);
+        BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast, dictBuf, dictSize);
 
     return 0;
 }
 
 
 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
-                   int cLevel, int cLevelLast)
+                   int cLevel, int cLevelLast,
+                   const char* dictFileName)
 {
     double const compressibility = (double)g_compressibilityDefault / 100;
+    char* dictBuf = NULL;
+    int dictSize = 0;
 
     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
     if (cLevelLast < cLevel) cLevelLast = cLevel;
     if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
 
+    if (dictFileName) {
+        FILE* dictFile = NULL;
+        U64 dictFileSize = UTIL_getFileSize(dictFileName);
+        if (!dictFileSize) EXM_THROW(25, "Dictionary error : could not stat dictionary file");
+
+        dictFile = fopen(dictFileName, "rb");
+        if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file");
+
+        if (dictFileSize > LZ4_MAX_DICT_SIZE) {
+            dictSize = LZ4_MAX_DICT_SIZE;
+            if (UTIL_fseek(dictFile, dictFileSize - dictSize, SEEK_SET))
+                EXM_THROW(25, "Dictionary error : could not seek dictionary file");
+        } else {
+            dictSize = (int)dictFileSize;
+        }
+
+        dictBuf = (char *)malloc(dictSize);
+        if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory");
+
+        if (fread(dictBuf, 1, dictSize, dictFile) != (size_t)dictSize)
+            EXM_THROW(25, "Dictionary error : could not read dictionary file");
+
+        fclose(dictFile);
+    }
+
     if (nbFiles == 0)
-        BMK_syntheticTest(cLevel, cLevelLast, compressibility);
+        BMK_syntheticTest(cLevel, cLevelLast, compressibility, dictBuf, dictSize);
     else {
         if (g_benchSeparately)
-            BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast);
+            BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize);
         else
-            BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
+            BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize);
     }
+
+    free(dictBuf);
     return 0;
 }
diff --git a/programs/bench.h b/programs/bench.h
index bb67bee..22ebf60 100644
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -26,7 +26,8 @@
 #include <stddef.h>
 
 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
-                   int cLevel, int cLevelLast);
+                   int cLevel, int cLevelLast,
+                   const char* dictFileName);
 
 /* Set Parameters */
 void BMK_setNbSeconds(unsigned nbLoops);
diff --git a/programs/lz4cli.c b/programs/lz4cli.c
index 5da7654..523b8a8 100644
--- a/programs/lz4cli.c
+++ b/programs/lz4cli.c
@@ -93,8 +93,11 @@
 ***************************************/
 #define DEFAULT_COMPRESSOR   LZ4IO_compressFilename
 #define DEFAULT_DECOMPRESSOR LZ4IO_decompressFilename
-int LZ4IO_compressFilename_Legacy(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename, int compressionlevel);   /* hidden function */
-
+int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel, const LZ4IO_prefs_t* prefs);   /* hidden function */
+int LZ4IO_compressMultipleFilenames_Legacy(
+                            const char** inFileNamesTable, int ifntSize,
+                            const char* suffix,
+                            int compressionLevel, const LZ4IO_prefs_t* prefs);
 
 /*-***************************
 *  Functions
@@ -392,7 +395,7 @@
                 if (!strcmp(argument,  "--favor-decSpeed")) { LZ4IO_favorDecSpeed(prefs, 1); continue; }
                 if (!strcmp(argument,  "--verbose")) { displayLevel++; continue; }
                 if (!strcmp(argument,  "--quiet")) { if (displayLevel) displayLevel--; continue; }
-                if (!strcmp(argument,  "--version")) { DISPLAYOUT(WELCOME_MESSAGE); return 0; }
+                if (!strcmp(argument,  "--version")) { DISPLAYOUT(WELCOME_MESSAGE); goto _cleanup; }
                 if (!strcmp(argument,  "--help")) { usage_advanced(exeName); goto _cleanup; }
                 if (!strcmp(argument,  "--keep")) { LZ4IO_setRemoveSrcFile(prefs, 0); continue; }   /* keep source file (default) */
                 if (!strcmp(argument,  "--rm")) { LZ4IO_setRemoveSrcFile(prefs, 1); continue; }
@@ -625,10 +628,18 @@
 #endif
     }
 
+    if (dictionary_filename) {
+        if (!strcmp(dictionary_filename, stdinmark) && IS_CONSOLE(stdin)) {
+            DISPLAYLEVEL(1, "refusing to read from a console\n");
+            exit(1);
+        }
+        LZ4IO_setDictionaryFilename(prefs, dictionary_filename);
+    }
+
     /* benchmark and test modes */
     if (mode == om_bench) {
         BMK_setNotificationLevel(displayLevel);
-        operationResult = BMK_benchFiles(inFileNames, ifnIdx, cLevel, cLevelLast);
+        operationResult = BMK_benchFiles(inFileNames, ifnIdx, cLevel, cLevelLast, dictionary_filename);
         goto _cleanup;
     }
 
@@ -638,14 +649,6 @@
         mode = om_decompress;   /* defer to decompress */
     }
 
-    if (dictionary_filename) {
-        if (!strcmp(dictionary_filename, stdinmark) && IS_CONSOLE(stdin)) {
-            DISPLAYLEVEL(1, "refusing to read from a console\n");
-            exit(1);
-        }
-        LZ4IO_setDictionaryFilename(prefs, dictionary_filename);
-    }
-
     /* compress or decompress */
     if (!input_filename) input_filename = stdinmark;
     /* Check if input is defined as console; trigger an error in this case */
@@ -658,7 +661,11 @@
         if (!output_filename) output_filename = stdoutmark;
     }
     else{
+#ifdef UTIL_HAS_CREATEFILELIST
         if (!recursive && !UTIL_isRegFile(input_filename)) {
+#else
+        if (!UTIL_isRegFile(input_filename)) {
+#endif
             DISPLAYLEVEL(1, "%s: is not a regular file \n", input_filename);
             exit(1);
         }
@@ -666,7 +673,7 @@
 
     /* No output filename ==> try to select one automatically (when possible) */
     while ((!output_filename) && (multiple_inputs==0)) {
-        if (!IS_CONSOLE(stdout)) {
+        if (!IS_CONSOLE(stdout) && mode != om_list) {
             /* Default to stdout whenever stdout is not the console.
              * Note : this policy may change in the future, therefore don't rely on it !
              * To ensure `stdout` is explicitly selected, use `-c` command flag.
@@ -740,23 +747,30 @@
     if (ifnIdx == 0) multiple_inputs = 0;
     if (mode == om_decompress) {
         if (multiple_inputs) {
+            const char* const dec_extension = !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION;
             assert(ifnIdx <= INT_MAX);
-            operationResult = LZ4IO_decompressMultipleFilenames(prefs, inFileNames, (int)ifnIdx, !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION);
+            operationResult = LZ4IO_decompressMultipleFilenames(inFileNames, (int)ifnIdx, dec_extension, prefs);
         } else {
-            operationResult = DEFAULT_DECOMPRESSOR(prefs, input_filename, output_filename);
+            operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename, prefs);
         }
     } else if (mode == om_list){
         operationResult = LZ4IO_displayCompressedFilesInfo(inFileNames, ifnIdx);
     } else {   /* compression is default action */
         if (legacy_format) {
             DISPLAYLEVEL(3, "! Generating LZ4 Legacy format (deprecated) ! \n");
-            LZ4IO_compressFilename_Legacy(prefs, input_filename, output_filename, cLevel);
+            if(multiple_inputs){
+                const char* const leg_extension = !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION;
+                LZ4IO_compressMultipleFilenames_Legacy(inFileNames, (int)ifnIdx, leg_extension, cLevel, prefs);
+            } else {
+                LZ4IO_compressFilename_Legacy(input_filename, output_filename, cLevel, prefs);
+            }
         } else {
             if (multiple_inputs) {
+                const char* const comp_extension = !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION;
                 assert(ifnIdx <= INT_MAX);
-                operationResult = LZ4IO_compressMultipleFilenames(prefs, inFileNames, (int)ifnIdx, !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION, cLevel);
+                operationResult = LZ4IO_compressMultipleFilenames(inFileNames, (int)ifnIdx, comp_extension, cLevel, prefs);
             } else {
-                operationResult = DEFAULT_COMPRESSOR(prefs, input_filename, output_filename, cLevel);
+                operationResult = DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel, prefs);
     }   }   }
 
 _cleanup:
diff --git a/programs/lz4io.c b/programs/lz4io.c
index d818535..a274798 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c
@@ -90,6 +90,7 @@
 *  Macros
 **************************************/
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYOUT(...)      fprintf(stdout, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 static int g_displayLevel = 0;   /* 0 : no display  ; 1: errors  ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */
 
@@ -110,20 +111,20 @@
 **************************************/
 
 struct LZ4IO_prefs_s {
-  int passThrough;
-  int overwrite;
-  int testMode;
-  int blockSizeId;
-  size_t blockSize;
-  int blockChecksum;
-  int streamChecksum;
-  int blockIndependence;
-  int sparseFileSupport;
-  int contentSizeFlag;
-  int useDictionary;
-  unsigned favorDecSpeed;
-  const char* dictionaryFilename;
-  int removeSrcFile;
+    int passThrough;
+    int overwrite;
+    int testMode;
+    int blockSizeId;
+    size_t blockSize;
+    int blockChecksum;
+    int streamChecksum;
+    int blockIndependence;
+    int sparseFileSupport;
+    int contentSizeFlag;
+    int useDictionary;
+    unsigned favorDecSpeed;
+    const char* dictionaryFilename;
+    int removeSrcFile;
 };
 
 /**************************************
@@ -158,28 +159,28 @@
 
 LZ4IO_prefs_t* LZ4IO_defaultPreferences(void)
 {
-  LZ4IO_prefs_t* const ret = (LZ4IO_prefs_t*)malloc(sizeof(LZ4IO_prefs_t));
-  if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
-  ret->passThrough = 0;
-  ret->overwrite = 1;
-  ret->testMode = 0;
-  ret->blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
-  ret->blockSize = 0;
-  ret->blockChecksum = 0;
-  ret->streamChecksum = 1;
-  ret->blockIndependence = 1;
-  ret->sparseFileSupport = 1;
-  ret->contentSizeFlag = 0;
-  ret->useDictionary = 0;
-  ret->favorDecSpeed = 0;
-  ret->dictionaryFilename = NULL;
-  ret->removeSrcFile = 0;
-  return ret;
+    LZ4IO_prefs_t* const ret = (LZ4IO_prefs_t*)malloc(sizeof(*ret));
+    if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
+    ret->passThrough = 0;
+    ret->overwrite = 1;
+    ret->testMode = 0;
+    ret->blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
+    ret->blockSize = 0;
+    ret->blockChecksum = 0;
+    ret->streamChecksum = 1;
+    ret->blockIndependence = 1;
+    ret->sparseFileSupport = 1;
+    ret->contentSizeFlag = 0;
+    ret->useDictionary = 0;
+    ret->favorDecSpeed = 0;
+    ret->dictionaryFilename = NULL;
+    ret->removeSrcFile = 0;
+    return ret;
 }
 
-void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs)
+void LZ4IO_freePreferences(LZ4IO_prefs_t* prefs)
 {
-  free(prefs);
+    free(prefs);
 }
 
 
@@ -241,20 +242,21 @@
     return prefs->blockSize;
 }
 
+/* Default setting : 1 == independent blocks */
 int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode)
 {
     prefs->blockIndependence = (blockMode == LZ4IO_blockIndependent);
     return prefs->blockIndependence;
 }
 
-/* Default setting : no block checksum */
+/* Default setting : 0 == no block checksum */
 int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
 {
     prefs->blockChecksum = (enable != 0);
     return prefs->blockChecksum;
 }
 
-/* Default setting : checksum enabled */
+/* Default setting : 1 == checksum enabled */
 int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
 {
     prefs->streamChecksum = (enable != 0);
@@ -268,10 +270,10 @@
     return g_displayLevel;
 }
 
-/* Default setting : 0 (disabled) */
+/* Default setting : 1 (auto: enabled on file, disabled on stdout) */
 int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable)
 {
-    prefs->sparseFileSupport = (enable!=0);
+    prefs->sparseFileSupport = 2*(enable!=0);  /* 2==force enable */
     return prefs->sparseFileSupport;
 }
 
@@ -324,26 +326,27 @@
 }
 
 /** FIO_openDstFile() :
+ *  prefs is writable, because sparseFileSupport might be updated.
  *  condition : `dstFileName` must be non-NULL.
  * @result : FILE* to `dstFileName`, or NULL if it fails */
-static FILE* LZ4IO_openDstFile(LZ4IO_prefs_t* const prefs, const char* dstFileName)
+static FILE* LZ4IO_openDstFile(const char* dstFileName, const LZ4IO_prefs_t* const prefs)
 {
     FILE* f;
     assert(dstFileName != NULL);
 
     if (!strcmp (dstFileName, stdoutmark)) {
-        DISPLAYLEVEL(4,"Using stdout for output\n");
+        DISPLAYLEVEL(4, "Using stdout for output \n");
         f = stdout;
         SET_BINARY_MODE(stdout);
         if (prefs->sparseFileSupport==1) {
-            prefs->sparseFileSupport = 0;
-            DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
+            DISPLAYLEVEL(4, "Sparse File Support automatically disabled on stdout ;"
+                            " to force-enable it, add --sparse command \n");
         }
     } else {
         if (!prefs->overwrite && strcmp (dstFileName, nulmark)) {  /* Check if destination file already exists */
-            f = fopen( dstFileName, "rb" );
-            if (f != NULL) {  /* dest exists, prompt for overwrite authorization */
-                fclose(f);
+            FILE* const testf = fopen( dstFileName, "rb" );
+            if (testf != NULL) {  /* dest exists, prompt for overwrite authorization */
+                fclose(testf);
                 if (g_displayLevel <= 1) {  /* No interaction possible */
                     DISPLAY("%s already exists; not overwritten  \n", dstFileName);
                     return NULL;
@@ -361,7 +364,9 @@
     }
 
     /* sparse file */
-    if (f && prefs->sparseFileSupport) { SET_SPARSE_FILE_MODE(f); }
+    {   int const sparseMode = (prefs->sparseFileSupport - (f==stdout)) > 0;
+        if (f && sparseMode) { SET_SPARSE_FILE_MODE(f); }
+    }
 
     return f;
 }
@@ -391,7 +396,8 @@
 /* LZ4IO_compressFilename_Legacy :
  * This function is intentionally "hidden" (not published in .h)
  * It generates compressed streams using the old 'legacy' format */
-int LZ4IO_compressFilename_Legacy(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename, int compressionlevel)
+int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename,
+                                  int compressionlevel, const LZ4IO_prefs_t* prefs)
 {
     typedef int (*compress_f)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
     compress_f const compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC;
@@ -409,7 +415,7 @@
     if (finput == NULL)
         EXM_THROW(20, "%s : open file error ", input_filename);
 
-    foutput = LZ4IO_openDstFile(prefs, output_filename);
+    foutput = LZ4IO_openDstFile(output_filename, prefs);
     if (foutput == NULL) {
         fclose(finput);
         EXM_THROW(20, "%s : open file error ", input_filename);
@@ -423,23 +429,22 @@
 
     /* Write Archive Header */
     LZ4IO_writeLE32(out_buff, LEGACY_MAGICNUMBER);
-    {   size_t const writeSize = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput);
-        if (writeSize != MAGICNUMBER_SIZE)
-            EXM_THROW(22, "Write error : cannot write header");
-    }
+    if (fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE)
+        EXM_THROW(22, "Write error : cannot write header");
 
     /* Main Loop */
     while (1) {
         int outSize;
         /* Read Block */
         size_t const inSize = fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput);
-        assert(inSize <= LEGACY_BLOCKSIZE);
         if (inSize == 0) break;
+        assert(inSize <= LEGACY_BLOCKSIZE);
         filesize += inSize;
 
         /* Compress Block */
         outSize = compressionFunction(in_buff, out_buff+4, (int)inSize, outBuffSize, compressionlevel);
-        compressedfilesize += outSize+4;
+        assert(outSize >= 0);
+        compressedfilesize += (unsigned long long)outSize+4;
         DISPLAYUPDATE(2, "\rRead : %i MB  ==> %.2f%%   ",
                 (int)(filesize>>20), (double)compressedfilesize/filesize*100);
 
@@ -447,9 +452,8 @@
         assert(outSize > 0);
         assert(outSize < outBuffSize);
         LZ4IO_writeLE32(out_buff, (unsigned)outSize);
-        {   size_t const writeSize = fwrite(out_buff, 1, outSize+4, foutput);
-            if (writeSize != (size_t)(outSize+4))
-                EXM_THROW(24, "Write error : cannot write compressed block");
+        if (fwrite(out_buff, 1, (size_t)outSize+4, foutput) != (size_t)(outSize+4)) {
+            EXM_THROW(24, "Write error : cannot write compressed block");
     }   }
     if (ferror(finput)) EXM_THROW(25, "Error while reading %s ", input_filename);
 
@@ -469,11 +473,59 @@
     free(in_buff);
     free(out_buff);
     fclose(finput);
-    fclose(foutput);
+    if (strcmp(output_filename,stdoutmark)) fclose(foutput);   /* do not close stdout */
 
     return 0;
 }
 
+#define FNSPACE 30
+/* LZ4IO_compressMultipleFilenames_Legacy :
+ * This function is intentionally "hidden" (not published in .h)
+ * It generates multiple compressed streams using the old 'legacy' format */
+int LZ4IO_compressMultipleFilenames_Legacy(
+                            const char** inFileNamesTable, int ifntSize,
+                            const char* suffix,
+                            int compressionLevel, const LZ4IO_prefs_t* prefs)
+{
+    int i;
+    int missed_files = 0;
+    char* dstFileName = (char*)malloc(FNSPACE);
+    size_t ofnSize = FNSPACE;
+    const size_t suffixSize = strlen(suffix);
+
+    if (dstFileName == NULL) return ifntSize;   /* not enough memory */
+
+    /* loop on each file */
+    for (i=0; i<ifntSize; i++) {
+        size_t const ifnSize = strlen(inFileNamesTable[i]);
+        if (!strcmp(suffix, stdoutmark)) {
+            missed_files += LZ4IO_compressFilename_Legacy(
+                                    inFileNamesTable[i], stdoutmark,
+                                    compressionLevel, prefs);
+            continue;
+        }
+
+        if (ofnSize <= ifnSize+suffixSize+1) {
+            free(dstFileName);
+            ofnSize = ifnSize + 20;
+            dstFileName = (char*)malloc(ofnSize);
+            if (dstFileName==NULL) {
+                return ifntSize;
+        }   }
+        strcpy(dstFileName, inFileNamesTable[i]);
+        strcat(dstFileName, suffix);
+
+        missed_files += LZ4IO_compressFilename_Legacy(
+                                inFileNamesTable[i], dstFileName,
+                                compressionLevel, prefs);
+    }
+
+    /* Close & Free */
+    free(dstFileName);
+
+    return missed_files;
+}
+
 
 /*********************************************
 *  Compression using Frame format
@@ -488,22 +540,20 @@
     LZ4F_CDict* cdict;
 } cRess_t;
 
-static void* LZ4IO_createDict(LZ4IO_prefs_t* const prefs, size_t *dictSize) {
+static void* LZ4IO_createDict(size_t* dictSize, const char* const dictFilename)
+{
     size_t readSize;
     size_t dictEnd = 0;
     size_t dictLen = 0;
     size_t dictStart;
     size_t circularBufSize = LZ4_MAX_DICT_SIZE;
-    char* circularBuf;
-    char* dictBuf;
-    const char* dictFilename = prefs->dictionaryFilename;
+    char*  circularBuf = (char*)malloc(circularBufSize);
+    char*  dictBuf;
     FILE* dictFile;
 
+    if (!circularBuf) EXM_THROW(25, "Allocation error : not enough memory for circular buffer");
     if (!dictFilename) EXM_THROW(25, "Dictionary error : no filename provided");
 
-    circularBuf = (char *) malloc(circularBufSize);
-    if (!circularBuf) EXM_THROW(25, "Allocation error : not enough memory");
-
     dictFile = LZ4IO_openSrcFile(dictFilename);
     if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file");
 
@@ -533,7 +583,7 @@
         circularBuf = NULL;
     } else {
         /* Otherwise, we will alloc a new buffer and copy our dict into that. */
-        dictBuf = (char *) malloc(dictLen ? dictLen : 1);
+        dictBuf = (char *)malloc(dictLen ? dictLen : 1);
         if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory");
 
         memcpy(dictBuf, circularBuf + dictStart, circularBufSize - dictStart);
@@ -546,21 +596,20 @@
     return dictBuf;
 }
 
-static LZ4F_CDict* LZ4IO_createCDict(LZ4IO_prefs_t* const prefs) {
+static LZ4F_CDict* LZ4IO_createCDict(const LZ4IO_prefs_t* const prefs)
+{
     size_t dictionarySize;
     void* dictionaryBuffer;
     LZ4F_CDict* cdict;
-    if (!prefs->useDictionary) {
-        return NULL;
-    }
-    dictionaryBuffer = LZ4IO_createDict(prefs, &dictionarySize);
+    if (!prefs->useDictionary) return NULL;
+    dictionaryBuffer = LZ4IO_createDict(&dictionarySize, prefs->dictionaryFilename);
     if (!dictionaryBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
     cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize);
     free(dictionaryBuffer);
     return cdict;
 }
 
-static cRess_t LZ4IO_createCResources(LZ4IO_prefs_t* const prefs)
+static cRess_t LZ4IO_createCResources(const LZ4IO_prefs_t* const prefs)
 {
     const size_t blockSize = prefs->blockSize;
     cRess_t ress;
@@ -598,13 +647,12 @@
  *          1 : missing or pb opening srcFileName
  */
 static int
-LZ4IO_compressFilename_extRess(LZ4IO_prefs_t* const io_prefs, cRess_t ress,
+LZ4IO_compressFilename_extRess(cRess_t ress,
                                const char* srcFileName, const char* dstFileName,
-                               int compressionLevel)
+                               int compressionLevel, const LZ4IO_prefs_t* const io_prefs)
 {
     unsigned long long filesize = 0;
     unsigned long long compressedfilesize = 0;
-    FILE* srcFile;
     FILE* dstFile;
     void* const srcBuffer = ress.srcBuffer;
     void* const dstBuffer = ress.dstBuffer;
@@ -615,13 +663,12 @@
     LZ4F_preferences_t prefs;
 
     /* Init */
-    srcFile = LZ4IO_openSrcFile(srcFileName);
+    FILE* const srcFile = LZ4IO_openSrcFile(srcFileName);
     if (srcFile == NULL) return 1;
-    dstFile = LZ4IO_openDstFile(io_prefs, dstFileName);
+    dstFile = LZ4IO_openDstFile(dstFileName, io_prefs);
     if (dstFile == NULL) { fclose(srcFile); return 1; }
     memset(&prefs, 0, sizeof(prefs));
 
-
     /* Set compression parameters */
     prefs.autoFlush = 1;
     prefs.compressionLevel = compressionLevel;
@@ -631,7 +678,7 @@
     prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)io_prefs->streamChecksum;
     prefs.favorDecSpeed = io_prefs->favorDecSpeed;
     if (io_prefs->contentSizeFlag) {
-      U64 const fileSize = UTIL_getFileSize(srcFileName);
+      U64 const fileSize = UTIL_getOpenFileSize(srcFile);
       prefs.frameInfo.contentSize = fileSize;   /* == 0 if input == stdin */
       if (fileSize==0)
           DISPLAYLEVEL(3, "Warning : cannot determine input content size \n");
@@ -645,41 +692,41 @@
     /* single-block file */
     if (readSize < blockSize) {
         /* Compress in single pass */
-        size_t cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
-        if (LZ4F_isError(cSize)) EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize));
+        size_t const cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
+        if (LZ4F_isError(cSize))
+            EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize));
         compressedfilesize = cSize;
         DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ",
                       (unsigned)(filesize>>20), (double)compressedfilesize/(filesize+!filesize)*100);   /* avoid division by zero */
 
         /* Write Block */
-        {   size_t const sizeCheck = fwrite(dstBuffer, 1, cSize, dstFile);
-            if (sizeCheck!=cSize) EXM_THROW(32, "Write error : cannot write compressed block");
+        if (fwrite(dstBuffer, 1, cSize, dstFile) != cSize) {
+            EXM_THROW(32, "Write error : failed writing single-block compressed frame");
     }   }
 
     else
 
     /* multiple-blocks file */
     {
-        /* Write Archive Header */
-        size_t headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress.cdict, &prefs);
+        /* Write Frame Header */
+        size_t const headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress.cdict, &prefs);
         if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
-        { size_t const sizeCheck = fwrite(dstBuffer, 1, headerSize, dstFile);
-          if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
+        if (fwrite(dstBuffer, 1, headerSize, dstFile) != headerSize)
+            EXM_THROW(34, "Write error : cannot write header");
         compressedfilesize += headerSize;
 
-        /* Main Loop */
+        /* Main Loop - one block at a time */
         while (readSize>0) {
-            size_t outSize;
-
-            /* Compress Block */
-            outSize = LZ4F_compressUpdate(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, NULL);
-            if (LZ4F_isError(outSize)) EXM_THROW(35, "Compression failed : %s", LZ4F_getErrorName(outSize));
+            size_t const outSize = LZ4F_compressUpdate(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, NULL);
+            if (LZ4F_isError(outSize))
+                EXM_THROW(35, "Compression failed : %s", LZ4F_getErrorName(outSize));
             compressedfilesize += outSize;
-            DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ", (unsigned)(filesize>>20), (double)compressedfilesize/filesize*100);
+            DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ",
+                        (unsigned)(filesize>>20), (double)compressedfilesize/filesize*100);
 
             /* Write Block */
-            { size_t const sizeCheck = fwrite(dstBuffer, 1, outSize, dstFile);
-              if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); }
+            if (fwrite(dstBuffer, 1, outSize, dstFile) != outSize)
+                EXM_THROW(36, "Write error : cannot write compressed block");
 
             /* Read next block */
             readSize  = fread(srcBuffer, (size_t)1, (size_t)blockSize, srcFile);
@@ -687,18 +734,18 @@
         }
         if (ferror(srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
 
-        /* End of Stream mark */
-        headerSize = LZ4F_compressEnd(ctx, dstBuffer, dstBufferSize, NULL);
-        if (LZ4F_isError(headerSize)) EXM_THROW(38, "End of file generation failed : %s", LZ4F_getErrorName(headerSize));
-
-        { size_t const sizeCheck = fwrite(dstBuffer, 1, headerSize, dstFile);
-          if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); }
-        compressedfilesize += headerSize;
-    }
+        /* End of Frame mark */
+        {   size_t const endSize = LZ4F_compressEnd(ctx, dstBuffer, dstBufferSize, NULL);
+            if (LZ4F_isError(endSize))
+                EXM_THROW(38, "End of frame error : %s", LZ4F_getErrorName(endSize));
+            if (fwrite(dstBuffer, 1, endSize, dstFile) != endSize)
+                EXM_THROW(39, "Write error : cannot write end of frame");
+            compressedfilesize += endSize;
+    }   }
 
     /* Release file handlers */
     fclose (srcFile);
-    if (strcmp(dstFileName,stdoutmark)) fclose (dstFile);   /* do not close stdout */
+    if (strcmp(dstFileName,stdoutmark)) fclose (dstFile);  /* do not close stdout */
 
     /* Copy owner, file permissions and modification time */
     {   stat_t statbuf;
@@ -724,13 +771,13 @@
 }
 
 
-int LZ4IO_compressFilename(LZ4IO_prefs_t* const prefs, const char* srcFileName, const char* dstFileName, int compressionLevel)
+int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel, const LZ4IO_prefs_t* prefs)
 {
     UTIL_time_t const timeStart = UTIL_getTime();
     clock_t const cpuStart = clock();
     cRess_t const ress = LZ4IO_createCResources(prefs);
 
-    int const result = LZ4IO_compressFilename_extRess(prefs, ress, srcFileName, dstFileName, compressionLevel);
+    int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel, prefs);
 
     /* Free resources */
     LZ4IO_freeCResources(ress);
@@ -748,11 +795,11 @@
 }
 
 
-#define FNSPACE 30
-int LZ4IO_compressMultipleFilenames(LZ4IO_prefs_t* const prefs,
+int LZ4IO_compressMultipleFilenames(
                               const char** inFileNamesTable, int ifntSize,
                               const char* suffix,
-                              int compressionLevel)
+                              int compressionLevel,
+                              const LZ4IO_prefs_t* prefs)
 {
     int i;
     int missed_files = 0;
@@ -768,9 +815,9 @@
     for (i=0; i<ifntSize; i++) {
         size_t const ifnSize = strlen(inFileNamesTable[i]);
         if (!strcmp(suffix, stdoutmark)) {
-            missed_files += LZ4IO_compressFilename_extRess(prefs, ress,
+            missed_files += LZ4IO_compressFilename_extRess(ress,
                                     inFileNamesTable[i], stdoutmark,
-                                    compressionLevel);
+                                    compressionLevel, prefs);
             continue;
         }
         if (ofnSize <= ifnSize+suffixSize+1) {
@@ -784,9 +831,9 @@
         strcpy(dstFileName, inFileNamesTable[i]);
         strcat(dstFileName, suffix);
 
-        missed_files += LZ4IO_compressFilename_extRess(prefs, ress,
+        missed_files += LZ4IO_compressFilename_extRess(ress,
                                 inFileNamesTable[i], dstFileName,
-                                compressionLevel);
+                                compressionLevel, prefs);
     }
 
     /* Close & Free */
@@ -813,7 +860,11 @@
 }
 
 
-static unsigned LZ4IO_fwriteSparse(LZ4IO_prefs_t* const prefs, FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
+static unsigned
+LZ4IO_fwriteSparse(FILE* file,
+                   const void* buffer, size_t bufferSize,
+                   int sparseFileSupport,
+                   unsigned storedSkips)
 {
     const size_t sizeT = sizeof(size_t);
     const size_t maskT = sizeT -1 ;
@@ -822,8 +873,9 @@
     size_t bufferSizeT = bufferSize / sizeT;
     const size_t* const bufferTEnd = bufferT + bufferSizeT;
     const size_t segmentSizeT = (32 KB) / sizeT;
+    int const sparseMode = (sparseFileSupport - (file==stdout)) > 0;
 
-    if (!prefs->sparseFileSupport) {  /* normal write */
+    if (!sparseMode) {  /* normal write */
         size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
         if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block");
         return 0;
@@ -871,7 +923,7 @@
             int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
             if (seekResult) EXM_THROW(74, "Sparse skip error ; try --no-sparse");
             storedSkips = 0;
-            {   size_t const sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, file);
+            {   size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
                 if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(75, "Write error : cannot write decoded end of block");
         }   }
     }
@@ -881,18 +933,18 @@
 
 static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
 {
-    if (storedSkips>0) {   /* implies g_sparseFileSupport>0 */
-        int const seekResult = UTIL_fseek(file, storedSkips-1, SEEK_CUR);
-        if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n");
-        {   const char lastZeroByte[1] = { 0 };
-            size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file);
-            if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n");
-    }   }
+    if (storedSkips>0) {   /* implies sparseFileSupport>0 */
+        const char lastZeroByte[1] = { 0 };
+        if (UTIL_fseek(file, storedSkips-1, SEEK_CUR) != 0)
+            EXM_THROW(69, "Final skip error (sparse file)\n");
+        if (fwrite(lastZeroByte, 1, 1, file) != 1)
+            EXM_THROW(69, "Write error : cannot write last zero\n");
+    }
 }
 
 
 static unsigned g_magicRead = 0;   /* out-parameter of LZ4IO_decodeLegacyStream() */
-static unsigned long long LZ4IO_decodeLegacyStream(LZ4IO_prefs_t* const prefs, FILE* finput, FILE* foutput)
+static unsigned long long LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput, const LZ4IO_prefs_t* prefs)
 {
     unsigned long long streamSize = 0;
     unsigned storedSkips = 0;
@@ -926,7 +978,7 @@
             if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !");
             streamSize += (unsigned long long)decodeSize;
             /* Write Block */
-            storedSkips = LZ4IO_fwriteSparse(prefs, foutput, out_buff, (size_t)decodeSize, storedSkips); /* success or die */
+            storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, (size_t)decodeSize, prefs->sparseFileSupport, storedSkips); /* success or die */
     }   }
     if (ferror(finput)) EXM_THROW(54, "Read error : ferror");
 
@@ -952,19 +1004,20 @@
     size_t dictBufferSize;
 } dRess_t;
 
-static void LZ4IO_loadDDict(LZ4IO_prefs_t* const prefs, dRess_t* ress) {
+static void LZ4IO_loadDDict(dRess_t* ress, const LZ4IO_prefs_t* const prefs)
+{
     if (!prefs->useDictionary) {
         ress->dictBuffer = NULL;
         ress->dictBufferSize = 0;
         return;
     }
 
-    ress->dictBuffer = LZ4IO_createDict(prefs, &ress->dictBufferSize);
+    ress->dictBuffer = LZ4IO_createDict(&ress->dictBufferSize, prefs->dictionaryFilename);
     if (!ress->dictBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
 }
 
 static const size_t LZ4IO_dBufferSize = 64 KB;
-static dRess_t LZ4IO_createDResources(LZ4IO_prefs_t* const prefs)
+static dRess_t LZ4IO_createDResources(const LZ4IO_prefs_t* const prefs)
 {
     dRess_t ress;
 
@@ -979,7 +1032,7 @@
     ress.dstBuffer = malloc(ress.dstBufferSize);
     if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
 
-    LZ4IO_loadDDict(prefs, &ress);
+    LZ4IO_loadDDict(&ress, prefs);
 
     ress.dstFile = NULL;
     return ress;
@@ -995,7 +1048,10 @@
 }
 
 
-static unsigned long long LZ4IO_decompressLZ4F(LZ4IO_prefs_t* const prefs, dRess_t ress, FILE* srcFile, FILE* dstFile)
+static unsigned long long
+LZ4IO_decompressLZ4F(dRess_t ress,
+                     FILE* const srcFile, FILE* const dstFile,
+                     const LZ4IO_prefs_t* const prefs)
 {
     unsigned long long filesize = 0;
     LZ4F_errorCode_t nextToLoad;
@@ -1031,7 +1087,7 @@
             /* Write Block */
             if (decodedBytes) {
                 if (!prefs->testMode)
-                    storedSkips = LZ4IO_fwriteSparse(prefs, dstFile, ress.dstBuffer, decodedBytes, storedSkips);
+                    storedSkips = LZ4IO_fwriteSparse(dstFile, ress.dstBuffer, decodedBytes, prefs->sparseFileSupport, storedSkips);
                 filesize += decodedBytes;
                 DISPLAYUPDATE(2, "\rDecompressed : %u MB  ", (unsigned)(filesize>>20));
             }
@@ -1049,22 +1105,30 @@
 }
 
 
+/* LZ4IO_passThrough:
+ * just output the same content as input, no decoding.
+ * This is a capability of zcat, and by extension lz4cat
+ * MNstore : contain the first MAGICNUMBER_SIZE bytes already read from finput
+ */
 #define PTSIZE  (64 KB)
 #define PTSIZET (PTSIZE / sizeof(size_t))
-static unsigned long long LZ4IO_passThrough(LZ4IO_prefs_t* const prefs, FILE* finput, FILE* foutput, unsigned char MNstore[MAGICNUMBER_SIZE])
+static unsigned long long
+LZ4IO_passThrough(FILE* finput, FILE* foutput,
+                  unsigned char MNstore[MAGICNUMBER_SIZE],
+                  int sparseFileSupport)
 {
 	size_t buffer[PTSIZET];
     size_t readBytes = 1;
     unsigned long long total = MAGICNUMBER_SIZE;
     unsigned storedSkips = 0;
 
-    size_t const sizeCheck = fwrite(MNstore, 1, MAGICNUMBER_SIZE, foutput);
-    if (sizeCheck != MAGICNUMBER_SIZE) EXM_THROW(50, "Pass-through write error");
-
+    if (fwrite(MNstore, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE) {
+        EXM_THROW(50, "Pass-through write error");
+    }
     while (readBytes) {
-        readBytes = fread(buffer, 1, PTSIZE, finput);
+        readBytes = fread(buffer, 1, sizeof(buffer), finput);
         total += readBytes;
-        storedSkips = LZ4IO_fwriteSparse(prefs, foutput, buffer, readBytes, storedSkips);
+        storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, sparseFileSupport, storedSkips);
     }
     if (ferror(finput)) EXM_THROW(51, "Read Error");
 
@@ -1091,7 +1155,10 @@
 }
 
 #define ENDOFSTREAM ((unsigned long long)-1)
-static unsigned long long selectDecoder(LZ4IO_prefs_t* const prefs, dRess_t ress, FILE* finput, FILE* foutput)
+static unsigned long long
+selectDecoder(dRess_t ress,
+              FILE* finput, FILE* foutput,
+              const LZ4IO_prefs_t* const prefs)
 {
     unsigned char MNstore[MAGICNUMBER_SIZE];
     unsigned magicNumber;
@@ -1117,10 +1184,10 @@
     switch(magicNumber)
     {
     case LZ4IO_MAGICNUMBER:
-        return LZ4IO_decompressLZ4F(prefs, ress, finput, foutput);
+        return LZ4IO_decompressLZ4F(ress, finput, foutput, prefs);
     case LEGACY_MAGICNUMBER:
         DISPLAYLEVEL(4, "Detected : Legacy format \n");
-        return LZ4IO_decodeLegacyStream(prefs, finput, foutput);
+        return LZ4IO_decodeLegacyStream(finput, foutput, prefs);
     case LZ4IO_SKIPPABLE0:
         DISPLAYLEVEL(4, "Skipping detected skippable area \n");
         {   size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
@@ -1139,7 +1206,7 @@
             /* Wrong magic number at the beginning of 1st stream */
             if (!prefs->testMode && prefs->overwrite && prefs->passThrough) {
                 nbFrames = 0;
-                return LZ4IO_passThrough(prefs, finput, foutput, MNstore);
+                return LZ4IO_passThrough(finput, foutput, MNstore, prefs->sparseFileSupport);
             }
             EXM_THROW(44,"Unrecognized header : file cannot be decoded");
         }
@@ -1154,7 +1221,10 @@
 }
 
 
-static int LZ4IO_decompressSrcFile(LZ4IO_prefs_t* const prefs, dRess_t ress, const char* input_filename, const char* output_filename)
+static int
+LZ4IO_decompressSrcFile(dRess_t ress,
+                        const char* input_filename, const char* output_filename,
+                        const LZ4IO_prefs_t* const prefs)
 {
     FILE* const foutput = ress.dstFile;
     unsigned long long filesize = 0;
@@ -1162,11 +1232,12 @@
     /* Init */
     FILE* const finput = LZ4IO_openSrcFile(input_filename);
     if (finput==NULL) return 1;
+    assert(foutput != NULL);
 
     /* Loop over multiple streams */
     for ( ; ; ) {  /* endless loop, see break condition */
         unsigned long long const decodedSize =
-                        selectDecoder(prefs, ress, finput, foutput);
+                        selectDecoder(ress, finput, foutput, prefs);
         if (decodedSize == ENDOFSTREAM) break;
         filesize += decodedSize;
     }
@@ -1187,11 +1258,14 @@
 }
 
 
-static int LZ4IO_decompressDstFile(LZ4IO_prefs_t* const prefs, dRess_t ress, const char* input_filename, const char* output_filename)
+static int
+LZ4IO_decompressDstFile(dRess_t ress,
+                        const char* input_filename, const char* output_filename,
+                        const LZ4IO_prefs_t* const prefs)
 {
     stat_t statbuf;
     int stat_result = 0;
-    FILE* const foutput = LZ4IO_openDstFile(prefs, output_filename);
+    FILE* const foutput = LZ4IO_openDstFile(output_filename, prefs);
     if (foutput==NULL) return 1;   /* failure */
 
     if ( strcmp(input_filename, stdinmark)
@@ -1199,7 +1273,7 @@
         stat_result = 1;
 
     ress.dstFile = foutput;
-    LZ4IO_decompressSrcFile(prefs, ress, input_filename, output_filename);
+    LZ4IO_decompressSrcFile(ress, input_filename, output_filename, prefs);
 
     fclose(foutput);
 
@@ -1215,12 +1289,12 @@
 }
 
 
-int LZ4IO_decompressFilename(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename)
+int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename, const LZ4IO_prefs_t* prefs)
 {
     dRess_t const ress = LZ4IO_createDResources(prefs);
     clock_t const start = clock();
 
-    int const missingFiles = LZ4IO_decompressDstFile(prefs, ress, input_filename, output_filename);
+    int const missingFiles = LZ4IO_decompressDstFile(ress, input_filename, output_filename, prefs);
 
     clock_t const end = clock();
     double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
@@ -1231,9 +1305,10 @@
 }
 
 
-int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs,
-                                const char** inFileNamesTable, int ifntSize,
-                                const char* suffix)
+int LZ4IO_decompressMultipleFilenames(
+                            const char** inFileNamesTable, int ifntSize,
+                            const char* suffix,
+                            const LZ4IO_prefs_t* prefs)
 {
     int i;
     int skippedFiles = 0;
@@ -1243,21 +1318,21 @@
     size_t const suffixSize = strlen(suffix);
     dRess_t ress = LZ4IO_createDResources(prefs);
 
-    if (outFileName==NULL) return ifntSize;   /* not enough memory */
-    ress.dstFile = LZ4IO_openDstFile(prefs, stdoutmark);
+    if (outFileName==NULL) EXM_THROW(70, "Memory allocation error");
+    ress.dstFile = LZ4IO_openDstFile(stdoutmark, prefs);
 
     for (i=0; i<ifntSize; i++) {
         size_t const ifnSize = strlen(inFileNamesTable[i]);
         const char* const suffixPtr = inFileNamesTable[i] + ifnSize - suffixSize;
         if (!strcmp(suffix, stdoutmark)) {
-            missingFiles += LZ4IO_decompressSrcFile(prefs, ress, inFileNamesTable[i], stdoutmark);
+            missingFiles += LZ4IO_decompressSrcFile(ress, inFileNamesTable[i], stdoutmark, prefs);
             continue;
         }
         if (ofnSize <= ifnSize-suffixSize+1) {
             free(outFileName);
             ofnSize = ifnSize + 20;
             outFileName = (char*)malloc(ofnSize);
-            if (outFileName==NULL) return ifntSize;
+            if (outFileName==NULL) EXM_THROW(71, "Memory allocation error");
         }
         if (ifnSize <= suffixSize  ||  strcmp(suffixPtr, suffix) != 0) {
             DISPLAYLEVEL(1, "File extension doesn't match expected LZ4_EXTENSION (%4s); will not process file: %s\n", suffix, inFileNamesTable[i]);
@@ -1266,7 +1341,7 @@
         }
         memcpy(outFileName, inFileNamesTable[i], ifnSize - suffixSize);
         outFileName[ifnSize-suffixSize] = '\0';
-        missingFiles += LZ4IO_decompressDstFile(prefs, ress, inFileNamesTable[i], outFileName);
+        missingFiles += LZ4IO_decompressDstFile(ress, inFileNamesTable[i], outFileName, prefs);
     }
 
     LZ4IO_freeDResources(ress);
@@ -1303,7 +1378,7 @@
     unsigned short allContentSize;
 } LZ4IO_cFileInfo_t;
 
-#define LZ4IO_INIT_CFILEINFO   { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 }
+#define LZ4IO_INIT_CFILEINFO  { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 }
 
 typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult;
 
@@ -1315,9 +1390,11 @@
    returns 0 in case it can't succesfully skip block data.
    Assumes SEEK_CUR after frame header.
  */
-static unsigned long long LZ4IO_skipBlocksData(FILE* finput,
-        const LZ4F_blockChecksum_t blockChecksumFlag,
-        const LZ4F_contentChecksum_t contentChecksumFlag) {
+static unsigned long long
+LZ4IO_skipBlocksData(FILE* finput,
+               const LZ4F_blockChecksum_t blockChecksumFlag,
+               const LZ4F_contentChecksum_t contentChecksumFlag)
+{
     unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE];
     unsigned long long totalBlocksSize = 0;
     for (;;) {
@@ -1326,8 +1403,7 @@
             return 0;
         }
         totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE;
-        {
-            const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU;
+        {   const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU;
             const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE);
             if (nextCBlockSize == 0) {
                 /* Reached EndMark */
@@ -1342,11 +1418,9 @@
             }
             totalBlocksSize += nextBlock;
             /* skip to the next block */
-            if (UTIL_fseek(finput, nextBlock, SEEK_CUR) != 0) {
-                return 0;
-            }
-        }
-    }
+            assert(nextBlock < LONG_MAX);
+            if (UTIL_fseek(finput, (long)nextBlock, SEEK_CUR) != 0) return 0;
+    }   }
     return totalBlocksSize;
 }
 
@@ -1357,7 +1431,8 @@
    This works as long as legacy block header size = magic number size.
    Assumes SEEK_CUR after frame header.
  */
-static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput) {
+static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput)
+{
     unsigned char blockInfo[LZIO_LEGACY_BLOCK_HEADER_SIZE];
     unsigned long long totalBlocksSize = 0;
     LZ4IO_STATIC_ASSERT(LZIO_LEGACY_BLOCK_HEADER_SIZE == MAGICNUMBER_SIZE);
@@ -1380,14 +1455,15 @@
             /* skip to the next block */
             if (UTIL_fseek(finput, nextCBlockSize, SEEK_CUR) != 0) {
                 return 0;
-            }
-        }
-    }
+    }   }   }
     return totalBlocksSize;
 }
 
-/* buffer : must be a valid memory area of at least 4 bytes */
-const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer) {
+/* LZ4IO_blockTypeID:
+ * return human-readable block type, following command line convention
+ * buffer : must be a valid memory area of at least 4 bytes */
+const char* LZ4IO_blockTypeID(LZ4F_blockSizeID_t sizeID, LZ4F_blockMode_t blockMode, char buffer[4])
+{
     buffer[0] = 'B';
     assert(sizeID >= 4); assert(sizeID <= 7);
     buffer[1] = (char)(sizeID + '0');
@@ -1397,7 +1473,8 @@
 }
 
 /* buffer : must be valid memory area of at least 10 bytes */
-static const char* LZ4IO_toHuman(long double size, char *buf) {
+static const char* LZ4IO_toHuman(long double size, char *buf)
+{
     const char units[] = {"\0KMGTPEZY"};
     size_t i = 0;
     for (; size >= 1024; i++) size /= 1024;
@@ -1406,14 +1483,15 @@
 }
 
 /* Get filename without path prefix */
-static const char* LZ4IO_baseName(const char* input_filename) {
+static const char* LZ4IO_baseName(const char* input_filename)
+{
     const char* b = strrchr(input_filename, '/');
     if (!b) b = strrchr(input_filename, '\\');
     if (!b) return input_filename;
-    return b ? b + 1 : b;
+    return b + 1;
 }
 
-/* Report frame/s information in verbose mode.
+/* Report frame/s information (--list) in verbose mode (-v).
  * Will populate file info with fileName and frameSummary where applicable.
  * - TODO :
  *  + report nb of blocks, hence max. possible decompressed size (when not reported in header)
@@ -1424,17 +1502,20 @@
     LZ4IO_infoResult result = LZ4IO_format_not_known;  /* default result (error) */
     unsigned char buffer[LZ4F_HEADER_SIZE_MAX];
     FILE* const finput = LZ4IO_openSrcFile(input_filename);
-    cfinfo->fileSize = UTIL_getFileSize(input_filename);
+
+    if (finput == NULL) return LZ4IO_not_a_file;
+    cfinfo->fileSize = UTIL_getOpenFileSize(finput);
 
     while (!feof(finput)) {
         LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO;
         unsigned magicNumber;
         /* Get MagicNumber */
-        size_t nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput);
-        if (nbReadBytes == 0) { break; } /* EOF */
-        result = LZ4IO_format_not_known;  /* default result (error) */
-        if (nbReadBytes != MAGICNUMBER_SIZE)
-            EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
+        {   size_t const nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput);
+            if (nbReadBytes == 0) { break; } /* EOF */
+            result = LZ4IO_format_not_known;  /* default result (error) */
+            if (nbReadBytes != MAGICNUMBER_SIZE) {
+                EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
+        }   }
         magicNumber = LZ4IO_readLE32(buffer);   /* Little Endian format */
         if (LZ4IO_isSkippableMagicNumber(magicNumber))
             magicNumber = LZ4IO_SKIPPABLE0;   /* fold skippable magic numbers */
@@ -1447,56 +1528,49 @@
                 if (!readBytes || ferror(finput)) EXM_THROW(71, "Error reading %s", input_filename);
             }
             {   size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN);
-                if (!LZ4F_isError(hSize)) {
-                    if (hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)) {
-                        /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/
-                        const size_t readBytes = fread(buffer + LZ4F_HEADER_SIZE_MIN, 1, hSize - LZ4F_HEADER_SIZE_MIN, finput);
-                        if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename);
-                    }
-                    /* Create decompression context */
-                    {   LZ4F_dctx* dctx;
-                        unsigned isError = LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION));
-                        if (!isError) {
-                            isError = LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize));
-                            LZ4F_freeDecompressionContext(dctx);
-                            if (!isError) {
-                                if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID ||
-                                        cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode)
-                                        && cfinfo->frameCount != 0)
-                                    cfinfo->eqBlockTypes = 0;
-                                {   const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput,
-                                            frameInfo.lz4FrameInfo.blockChecksumFlag,
-                                            frameInfo.lz4FrameInfo.contentChecksumFlag);
-                                    if (totalBlocksSize) {
-                                        char bTypeBuffer[5];
-                                        LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer);
-                                        DISPLAYLEVEL(3, "    %6llu %14s %5s %8s",
-                                                     cfinfo->frameCount + 1,
-                                                     LZ4IO_frameTypeNames[frameInfo.frameType],
-                                                     bTypeBuffer,
-                                                     frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-");
-                                        if (frameInfo.lz4FrameInfo.contentSize) {
-                                            {   double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100;
-                                                DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n",
-                                                             totalBlocksSize + hSize,
-                                                             frameInfo.lz4FrameInfo.contentSize,
-                                                             ratio);
-                                            }
-                                            /* Now we've consumed frameInfo we can use it to store the total contentSize */
-                                            frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize;
-                                        }
-                                        else {
-                                            DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-");
-                                            cfinfo->allContentSize = 0;
-                                        }
-                                        result = LZ4IO_LZ4F_OK;
-                                    }
-                                }
-                            }
-                        }
-                    }
+                if (LZ4F_isError(hSize)) break;
+                if (hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)) {
+                    /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/
+                    const size_t readBytes = fread(buffer + LZ4F_HEADER_SIZE_MIN, 1, hSize - LZ4F_HEADER_SIZE_MIN, finput);
+                    if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename);
                 }
-            }
+                /* Create decompression context */
+                {   LZ4F_dctx* dctx;
+                    if ( LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION)) ) break;
+                    {   unsigned const frameInfoError = LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize));
+                        LZ4F_freeDecompressionContext(dctx);
+                        if (frameInfoError) break;
+                        if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID ||
+                                cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode)
+                                && cfinfo->frameCount != 0)
+                            cfinfo->eqBlockTypes = 0;
+                        {   const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput,
+                                    frameInfo.lz4FrameInfo.blockChecksumFlag,
+                                    frameInfo.lz4FrameInfo.contentChecksumFlag);
+                            if (totalBlocksSize) {
+                                char bTypeBuffer[5];
+                                LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer);
+                                DISPLAYLEVEL(3, "    %6llu %14s %5s %8s",
+                                             cfinfo->frameCount + 1,
+                                             LZ4IO_frameTypeNames[frameInfo.frameType],
+                                             bTypeBuffer,
+                                             frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-");
+                                if (frameInfo.lz4FrameInfo.contentSize) {
+                                    {   double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100;
+                                        DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n",
+                                                     totalBlocksSize + hSize,
+                                                     frameInfo.lz4FrameInfo.contentSize,
+                                                     ratio);
+                                    }
+                                    /* Now we've consumed frameInfo we can use it to store the total contentSize */
+                                    frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize;
+                                }
+                                else {
+                                    DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-");
+                                    cfinfo->allContentSize = 0;
+                                }
+                                result = LZ4IO_LZ4F_OK;
+            }   }   }   }   }
             break;
         case LEGACY_MAGICNUMBER:
             frameInfo.frameType = legacyFrame;
@@ -1512,15 +1586,14 @@
                                  totalBlocksSize + 4,
                                  "-", "-");
                     result = LZ4IO_LZ4F_OK;
-                }
-            }
+            }   }
             break;
         case LZ4IO_SKIPPABLE0:
             frameInfo.frameType = skippableFrame;
             if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0;
             cfinfo->eqBlockTypes = 0;
             cfinfo->allContentSize = 0;
-            {   nbReadBytes = fread(buffer, 1, 4, finput);
+            {   size_t const nbReadBytes = fread(buffer, 1, 4, finput);
                 if (nbReadBytes != 4)
                     EXM_THROW(42, "Stream error : skippable size unreadable");
             }
@@ -1545,12 +1618,10 @@
             }
         break;
         }
-        if (result != LZ4IO_LZ4F_OK) {
-            break;
-        }
+        if (result != LZ4IO_LZ4F_OK) break;
         cfinfo->frameSummary = frameInfo;
         cfinfo->frameCount++;
-    }
+    }  /* while (!feof(finput)) */
     fclose(finput);
     return result;
 }
@@ -1561,7 +1632,7 @@
     int result = 0;
     size_t idx = 0;
     if (g_displayLevel < 3) {
-        DISPLAY("%10s %14s %5s %11s %13s %9s   %s\n",
+        DISPLAYOUT("%10s %14s %5s %11s %13s %9s   %s\n",
                 "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename");
     }
     for (; idx < ifnIdx; idx++) {
@@ -1580,13 +1651,12 @@
                 assert(op_result == LZ4IO_format_not_known);
                 DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
                 return 0;
-            }
-        }
+        }   }
         DISPLAYLEVEL(3, "\n");
         if (g_displayLevel < 3) {
             /* Display Summary */
             {   char buffers[3][10];
-                DISPLAY("%10llu %14s %5s %11s %13s ",
+                DISPLAYOUT("%10llu %14s %5s %11s %13s ",
                         cfinfo.frameCount,
                         cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" ,
                         cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID,
@@ -1595,15 +1665,13 @@
                         cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-");
                 if (cfinfo.allContentSize) {
                     double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100;
-                    DISPLAY("%9.2f%%  %s \n", ratio, cfinfo.fileName);
+                    DISPLAYOUT("%9.2f%%  %s \n", ratio, cfinfo.fileName);
                 } else {
-                    DISPLAY("%9s   %s\n",
+                    DISPLAYOUT("%9s   %s\n",
                             "-",
                             cfinfo.fileName);
-                }
-            }
-        }
-    }
+        }   }   }  /* if (g_displayLevel < 3) */
+    }  /* for (; idx < ifnIdx; idx++) */
 
     return result;
 }
diff --git a/programs/lz4io.h b/programs/lz4io.h
index b189e35..d6d7eee 100644
--- a/programs/lz4io.h
+++ b/programs/lz4io.h
@@ -39,13 +39,13 @@
 /* ************************************************** */
 /* Special input/output values                        */
 /* ************************************************** */
+#define stdinmark  "stdin"
+#define stdoutmark "stdout"
 #define NULL_OUTPUT "null"
-static const char stdinmark[]  = "stdin";
-static const char stdoutmark[] = "stdout";
 #ifdef _WIN32
-static const char nulmark[] = "nul";
+#define nulmark "nul"
 #else
-static const char nulmark[] = "/dev/null";
+#define nulmark "/dev/null"
 #endif
 
 /* ************************************************** */
@@ -55,7 +55,7 @@
 typedef struct LZ4IO_prefs_s LZ4IO_prefs_t;
 
 LZ4IO_prefs_t* LZ4IO_defaultPreferences(void);
-void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs);
+void LZ4IO_freePreferences(LZ4IO_prefs_t* prefs);
 
 /* Size in bytes of a legacy block header in little-endian format */
 #define LZIO_LEGACY_BLOCK_HEADER_SIZE 4
@@ -65,12 +65,12 @@
 /* ************************************************** */
 
 /* if output_filename == stdoutmark, writes to stdout */
-int LZ4IO_compressFilename(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename, int compressionlevel);
-int LZ4IO_decompressFilename(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename);
+int LZ4IO_compressFilename(const char* input_filename, const char* output_filename, int compressionlevel, const LZ4IO_prefs_t* prefs);
+int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename, const LZ4IO_prefs_t* prefs);
 
 /* if suffix == stdoutmark, writes to stdout */
-int LZ4IO_compressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel);
-int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** inFileNamesTable, int ifntSize, const char* suffix);
+int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel, const LZ4IO_prefs_t* prefs);
+int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, const LZ4IO_prefs_t* prefs);
 
 
 /* ************************************************** */
diff --git a/programs/platform.h b/programs/platform.h
index c0b3840..ab8300d 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -77,16 +77,17 @@
 *  PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows
 *  PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX
 *  PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION
-***************************************************************/
+************************************************************** */
 #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
    || defined(__midipix__) || defined(__VMS))
 #  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \
-     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  || defined(__MidnightBSD__) /* BSD distros */
+     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  || defined(__MidnightBSD__) /* BSD distros */ \
+     || defined(__HAIKU__)
 #    define PLATFORM_POSIX_VERSION 200112L
 #  else
 #    if defined(__linux__) || defined(__linux)
 #      ifndef _POSIX_C_SOURCE
-#        define _POSIX_C_SOURCE 200112L  /* use feature test macro */
+#        define _POSIX_C_SOURCE 200809L  /* use feature test macro */
 #      endif
 #    endif
 #    include <unistd.h>  /* declares _POSIX_VERSION */
@@ -104,7 +105,7 @@
 
 /*-*********************************************
 *  Detect if isatty() and fileno() are available
-************************************************/
+*********************************************** */
 #if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
 #  include <unistd.h>   /* isatty */
 #  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
@@ -127,7 +128,7 @@
 
 /******************************
 *  OS-specific Includes
-******************************/
+***************************** */
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
 #  include <fcntl.h>   /* _O_BINARY */
 #  include <io.h>      /* _setmode, _fileno, _get_osfhandle */
diff --git a/programs/util.h b/programs/util.h
index 1dd515c..733c1ca 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -33,16 +33,21 @@
 #include <stddef.h>       /* size_t, ptrdiff_t */
 #include <stdlib.h>       /* malloc */
 #include <string.h>       /* strlen, strncpy */
-#include <stdio.h>        /* fprintf */
+#include <stdio.h>        /* fprintf, fileno */
 #include <assert.h>
 #include <sys/types.h>    /* stat, utime */
 #include <sys/stat.h>     /* stat */
-#if defined(_MSC_VER)
+#if defined(_WIN32)
 #  include <sys/utime.h>  /* utime */
 #  include <io.h>         /* _chmod */
 #else
 #  include <unistd.h>     /* chown, stat */
+# if PLATFORM_POSIX_VERSION < 200809L
 #  include <utime.h>      /* utime */
+# else
+#  include <fcntl.h>      /* AT_FDCWD */
+#  include <sys/stat.h>   /* for utimensat */
+# endif
 #endif
 #include <time.h>         /* time */
 #include <limits.h>       /* INT_MAX */
@@ -117,6 +122,36 @@
 #endif
 
 
+/*-****************************************
+*  stat() functions
+******************************************/
+#if defined(_MSC_VER)
+#  define UTIL_TYPE_stat __stat64
+#  define UTIL_stat _stat64
+#  define UTIL_fstat _fstat64
+#  define UTIL_STAT_MODE_ISREG(st_mode) ((st_mode) & S_IFREG)
+#elif   defined(__MINGW32__) && defined (__MSVCRT__)
+#  define UTIL_TYPE_stat _stati64
+#  define UTIL_stat _stati64
+#  define UTIL_fstat _fstati64
+#  define UTIL_STAT_MODE_ISREG(st_mode) ((st_mode) & S_IFREG)
+#else
+#  define UTIL_TYPE_stat stat
+#  define UTIL_stat stat
+#  define UTIL_fstat fstat
+#  define UTIL_STAT_MODE_ISREG(st_mode) (S_ISREG(st_mode))
+#endif
+
+
+/*-****************************************
+*  fileno() function
+******************************************/
+#if defined(_MSC_VER)
+#  define UTIL_fileno _fileno
+#else
+#  define UTIL_fileno fileno
+#endif
+
 /* *************************************
 *  Constants
 ***************************************/
@@ -287,14 +322,23 @@
 UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf)
 {
     int res = 0;
-    struct utimbuf timebuf;
 
     if (!UTIL_isRegFile(filename))
         return -1;
 
-    timebuf.actime = time(NULL);
-    timebuf.modtime = statbuf->st_mtime;
-    res += utime(filename, &timebuf);  /* set access and modification times */
+    {
+#if defined(_WIN32) || (PLATFORM_POSIX_VERSION < 200809L)
+        struct utimbuf timebuf;
+        timebuf.actime = time(NULL);
+        timebuf.modtime = statbuf->st_mtime;
+        res += utime(filename, &timebuf);  /* set access and modification times */
+#else
+        struct timespec timebuf[2] = {};
+        timebuf[0].tv_nsec = UTIME_NOW;
+        timebuf[1].tv_sec = statbuf->st_mtime;
+        res += utimensat(AT_FDCWD, filename, timebuf, 0);  /* set access and modification times */
+#endif
+    }
 
 #if !defined(_WIN32)
     res += chown(filename, statbuf->st_uid, statbuf->st_gid);  /* Copy ownership */
@@ -343,22 +387,30 @@
 }
 
 
+UTIL_STATIC U64 UTIL_getOpenFileSize(FILE* file)
+{
+    int r;
+    int fd;
+    struct UTIL_TYPE_stat statbuf;
+
+    fd = UTIL_fileno(file);
+    if (fd < 0) {
+        perror("fileno");
+        exit(1);
+    }
+    r = UTIL_fstat(fd, &statbuf);
+    if (r || !UTIL_STAT_MODE_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (U64)statbuf.st_size;
+}
+
+
 UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
 {
     int r;
-#if defined(_MSC_VER)
-    struct __stat64 statbuf;
-    r = _stat64(infilename, &statbuf);
-    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
-#elif defined(__MINGW32__) && defined (__MSVCRT__)
-    struct _stati64 statbuf;
-    r = _stati64(infilename, &statbuf);
-    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
-#else
-    struct stat statbuf;
-    r = stat(infilename, &statbuf);
-    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
-#endif
+    struct UTIL_TYPE_stat statbuf;
+
+    r = UTIL_stat(infilename, &statbuf);
+    if (r || !UTIL_STAT_MODE_ISREG(statbuf.st_mode)) return 0;   /* No good... */
     return (U64)statbuf.st_size;
 }
 
@@ -542,15 +594,15 @@
 
     for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
         if (!UTIL_isDirectory(inputNames[i])) {
-            size_t const len = strlen(inputNames[i]);
+            size_t const len = strlen(inputNames[i]) + 1;  /* include nul char */
             if (pos + len >= bufSize) {
                 while (pos + len >= bufSize) bufSize += LIST_SIZE_INCREASE;
                 buf = (char*)UTIL_realloc(buf, bufSize);
                 if (!buf) return NULL;
             }
             assert(pos + len < bufSize);
-            strncpy(buf + pos, inputNames[i], bufSize - pos);
-            pos += len + 1;
+            memcpy(buf + pos, inputNames[i], len);
+            pos += len;
             nbFiles++;
         } else {
             char* bufend = buf + bufSize;
diff --git a/tests/.gitignore b/tests/.gitignore
index 0d13df8..99351af 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -11,6 +11,7 @@
 roundTripTest
 checkTag
 checkFrame
+decompress-partial
 
 # test artefacts
 tmp*
diff --git a/tests/Makefile b/tests/Makefile
index 422baba..6eee132 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -35,7 +35,7 @@
 
 DEBUGLEVEL?= 1
 DEBUGFLAGS = -g -DLZ4_DEBUG=$(DEBUGLEVEL)
-CFLAGS  ?= -O3 # can select custom optimization flags. For example : CFLAGS=-O2 make
+CFLAGS  ?= -O3 # can select custom optimization flags. Example : CFLAGS=-O2 make
 CFLAGS  += -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
            -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
            -Wpointer-arith -Wstrict-aliasing=1
@@ -53,9 +53,10 @@
 FUZZER_TIME  := -T90s
 NB_LOOPS     ?= -i1
 
+.PHONY: default
 default: all
 
-all: fullbench fuzzer frametest roundTripTest datagen checkFrame
+all: fullbench fuzzer frametest roundTripTest datagen checkFrame decompress-partial
 
 all32: CFLAGS+=-m32
 all32: all
@@ -89,6 +90,10 @@
 	$(MAKE) -C $(LZ4DIR) liblz4
 	$(CC) $(FLAGS) $^ -o $@$(EXT) -DLZ4_DLL_IMPORT=1 $(LZ4DIR)/dll/$(LIBLZ4).dll
 
+# test LZ4_USER_MEMORY_FUNCTIONS
+fullbench-wmalloc: CPPFLAGS += -DLZ4_USER_MEMORY_FUNCTIONS
+fullbench-wmalloc: fullbench
+
 fuzzer  : lz4.o lz4hc.o xxhash.o fuzzer.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
@@ -104,6 +109,10 @@
 checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
+decompress-partial: lz4.o decompress-partial.c
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
+.PHONY: clean
 clean:
 	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
@@ -114,7 +123,8 @@
         frametest$(EXT) frametest32$(EXT) \
         fasttest$(EXT) roundTripTest$(EXT) \
         datagen$(EXT) checkTag$(EXT) \
-        frameTest$(EXT) lz4_all.c
+        frameTest$(EXT) decompress-partial$(EXT) \
+		lz4_all.c
 	@$(RM) -rf $(TESTDIR)
 	@echo Cleaning completed
 
@@ -146,14 +156,19 @@
 DIFF:=gdiff
 endif
 
+CAT:=cat
 DD:=dd
+DATAGEN:=./datagen
 
 .PHONY: list
 list:
 	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 
+.PHONY: check
+check: test-lz4-essentials
+
 .PHONY: test
-test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install test-amalgamation listTest
+test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install test-amalgamation listTest test-decompress-partial
 
 .PHONY: test32
 test32: CFLAGS+=-m32
@@ -161,18 +176,15 @@
 
 test-amalgamation: lz4_all.o
 
-lz4_all.o: lz4_all.c
-	$(CC) $(CFLAGS) $(CPPFLAGS) -c $^ -o $@
-
 lz4_all.c: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c
-	cat $^ > $@
+	$(CAT) $^ > $@
 
 test-install: lz4 lib liblz4.pc
 	lz4_root=.. ./test_install.sh
 
 test-lz4-sparse: lz4 datagen
 	@echo "\n ---- test sparse file support ----"
-	./datagen -g5M  -P100 > tmplsdg5M
+	$(DATAGEN) -g5M  -P100 > tmplsdg5M
 	$(LZ4) -B4D tmplsdg5M -c | $(LZ4) -dv --sparse > tmplscB4
 	$(DIFF) -s tmplsdg5M tmplscB4
 	$(LZ4) -B5D tmplsdg5M -c | $(LZ4) -dv --sparse > tmplscB5
@@ -184,17 +196,17 @@
 	$(LZ4) tmplsdg5M -c | $(LZ4) -dv --no-sparse > tmplsnosparse
 	$(DIFF) -s tmplsdg5M tmplsnosparse
 	ls -ls tmpls*
-	./datagen -s1 -g1200007 -P100 | $(LZ4) | $(LZ4) -dv --sparse > tmplsodd   # Odd size file (to generate non-full last block)
-	./datagen -s1 -g1200007 -P100 | $(DIFF) -s - tmplsodd
+	$(DATAGEN) -s1 -g1200007 -P100 | $(LZ4) | $(LZ4) -dv --sparse > tmplsodd   # Odd size file (to generate non-full last block)
+	$(DATAGEN) -s1 -g1200007 -P100 | $(DIFF) -s - tmplsodd
 	ls -ls tmplsodd
 	@$(RM) tmpls*
 	@echo "\n Compatibility with Console :"
 	echo "Hello World 1 !" | $(LZ4) | $(LZ4) -d -c
-	echo "Hello World 2 !" | $(LZ4) | $(LZ4) -d | cat
+	echo "Hello World 2 !" | $(LZ4) | $(LZ4) -d | $(CAT)
 	echo "Hello World 3 !" | $(LZ4) --no-frame-crc | $(LZ4) -d -c
 	@echo "\n Compatibility with Append :"
-	./datagen -P100 -g1M > tmplsdg1M
-	cat tmplsdg1M tmplsdg1M > tmpls2M
+	$(DATAGEN) -P100 -g1M > tmplsdg1M
+	$(CAT) tmplsdg1M tmplsdg1M > tmpls2M
 	$(LZ4) -B5 -v tmplsdg1M tmplsc
 	$(LZ4) -d -v tmplsc tmplsr
 	$(LZ4) -d -v tmplsc -c >> tmplsr
@@ -204,20 +216,29 @@
 
 test-lz4-contentSize: lz4 datagen
 	@echo "\n ---- test original size support ----"
-	./datagen -g15M > tmplc1
+	$(DATAGEN) -g15M > tmplc1
 	$(LZ4) -v tmplc1 -c | $(LZ4) -t
 	$(LZ4) -v --content-size tmplc1 -c | $(LZ4) -d > tmplc2
-	$(DIFF) -s tmplc1 tmplc2
+	$(DIFF) tmplc1 tmplc2
+	$(LZ4) -f tmplc1 -c > tmplc1.lz4
+	$(LZ4) --content-size tmplc1 -c > tmplc2.lz4
+	! $(DIFF) tmplc1.lz4 tmplc2.lz4  # must differ, due to content size
+	$(LZ4) --content-size < tmplc1 > tmplc3.lz4
+	$(DIFF) tmplc2.lz4 tmplc3.lz4  # both must contain content size
+	$(CAT) tmplc1 | $(LZ4) > tmplc4.lz4
+	$(DIFF) tmplc1.lz4 tmplc4.lz4  # both don't have content size
+	$(CAT) tmplc1 | $(LZ4) --content-size > tmplc5.lz4 # can't determine content size
+	$(DIFF) tmplc1.lz4 tmplc5.lz4  # both don't have content size
 	@$(RM) tmplc*
 
 test-lz4-frame-concatenation: lz4 datagen
 	@echo "\n ---- test frame concatenation ----"
 	@echo -n > tmp-lfc-empty
 	@echo hi > tmp-lfc-nonempty
-	cat tmp-lfc-nonempty tmp-lfc-empty tmp-lfc-nonempty > tmp-lfc-src
+	$(CAT) tmp-lfc-nonempty tmp-lfc-empty tmp-lfc-nonempty > tmp-lfc-src
 	$(LZ4) -zq tmp-lfc-empty -c > tmp-lfc-empty.lz4
 	$(LZ4) -zq tmp-lfc-nonempty -c > tmp-lfc-nonempty.lz4
-	cat tmp-lfc-nonempty.lz4 tmp-lfc-empty.lz4 tmp-lfc-nonempty.lz4 > tmp-lfc-concat.lz4
+	$(CAT) tmp-lfc-nonempty.lz4 tmp-lfc-empty.lz4 tmp-lfc-nonempty.lz4 > tmp-lfc-concat.lz4
 	$(LZ4) -d tmp-lfc-concat.lz4 -c > tmp-lfc-result
 	$(CMP) tmp-lfc-src tmp-lfc-result
 	@$(RM) tmp-lfc-*
@@ -225,9 +246,9 @@
 
 test-lz4-multiple: lz4 datagen
 	@echo "\n ---- test multiple files ----"
-	@./datagen -s1        > tmp-tlm1 2> $(VOID)
-	@./datagen -s2 -g100K > tmp-tlm2 2> $(VOID)
-	@./datagen -s3 -g200K > tmp-tlm3 2> $(VOID)
+	@$(DATAGEN) -s1        > tmp-tlm1 2> $(VOID)
+	@$(DATAGEN) -s2 -g100K > tmp-tlm2 2> $(VOID)
+	@$(DATAGEN) -s3 -g200K > tmp-tlm3 2> $(VOID)
 	# compress multiple files : one .lz4 per source file
 	$(LZ4) -f -m tmp-tlm*
 	test -f tmp-tlm1.lz4
@@ -242,7 +263,7 @@
 	$(CMP) tmp-tlm2 tmp-tlm2-orig
 	$(CMP) tmp-tlm3 tmp-tlm3-orig
 	# compress multiple files into stdout
-	cat tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 > tmp-tlm-concat1
+	$(CAT) tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 > tmp-tlm-concat1
 	$(RM) *.lz4
 	$(LZ4) -m tmp-tlm1 tmp-tlm2 tmp-tlm3 -c > tmp-tlm-concat2
 	test ! -f tmp-tlm1.lz4  # must not create .lz4 artefact
@@ -250,7 +271,7 @@
 	# decompress multiple files into stdout
 	$(RM) tmp-tlm-concat1 tmp-tlm-concat2
 	$(LZ4) -f -m tmp-tlm1 tmp-tlm2 tmp-tlm3   # generate .lz4 to decompress
-	cat tmp-tlm1 tmp-tlm2 tmp-tlm3 > tmp-tlm-concat1   # create concatenated reference
+	$(CAT) tmp-tlm1 tmp-tlm2 tmp-tlm3 > tmp-tlm-concat1   # create concatenated reference
 	$(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3
 	$(LZ4) -d -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2
 	test ! -f tmp-tlm1  # must not create file artefact
@@ -259,20 +280,58 @@
 	! $(LZ4) -f -m tmp-tlm-concat1 notHere tmp-tlm-concat2  # must fail : notHere not present
 	@$(RM) tmp-tlm*
 
+test-lz4-multiple-legacy: lz4 datagen
+	@echo "\n ---- test multiple files (Legacy format) ----"
+	@$(DATAGEN) -s1        > tmp-tlm1 2> $(VOID)
+	@$(DATAGEN) -s2 -g100K > tmp-tlm2 2> $(VOID)
+	@$(DATAGEN) -s3 -g200K > tmp-tlm3 2> $(VOID)
+	# compress multiple files using legacy format: one .lz4 per source file
+	$(LZ4) -f -l -m tmp-tlm*
+	test -f tmp-tlm1.lz4
+	test -f tmp-tlm2.lz4
+	test -f tmp-tlm3.lz4
+	# decompress multiple files compressed using legacy format: one output file per .lz4
+	mv tmp-tlm1 tmp-tlm1-orig
+	mv tmp-tlm2 tmp-tlm2-orig
+	mv tmp-tlm3 tmp-tlm3-orig
+	$(LZ4) -d -f -m tmp-tlm*.lz4
+	$(LZ4) -l -d -f -m tmp-tlm*.lz4 # -l mustn't impact -d option
+	$(CMP) tmp-tlm1 tmp-tlm1-orig   # must be identical
+	$(CMP) tmp-tlm2 tmp-tlm2-orig
+	$(CMP) tmp-tlm3 tmp-tlm3-orig
+	# compress multiple files into stdout using legacy format
+	$(CAT) tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 > tmp-tlm-concat1
+	$(RM) *.lz4
+	$(LZ4) -l -m tmp-tlm1 tmp-tlm2 tmp-tlm3 -c > tmp-tlm-concat2
+	test ! -f tmp-tlm1.lz4  # must not create .lz4 artefact
+	$(CMP) tmp-tlm-concat1 tmp-tlm-concat2  # must be equivalent
+	# # # decompress multiple files into stdout using legacy format
+	$(RM) tmp-tlm-concat1 tmp-tlm-concat2
+	$(LZ4) -l -f -m tmp-tlm1 tmp-tlm2 tmp-tlm3   # generate .lz4 to decompress
+	$(CAT) tmp-tlm1 tmp-tlm2 tmp-tlm3 > tmp-tlm-concat1   # create concatenated reference
+	$(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3
+	$(LZ4) -d -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2
+	$(LZ4) -d -l -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2 # -l mustn't impact option -d
+	test ! -f tmp-tlm1  # must not create file artefact
+	$(CMP) tmp-tlm-concat1 tmp-tlm-concat2  # must be equivalent
+	# # # compress multiple files, one of which is absent (must fail)
+	! $(LZ4) -f -l -m tmp-tlm-concat1 notHere-legacy tmp-tlm-concat2  # must fail : notHere-legacy not present
+	@$(RM) tmp-tlm*
+
 test-lz4-basic: lz4 datagen unlz4 lz4cat
 	@echo "\n ---- test lz4 basic compression/decompression ----"
-	./datagen -g0       | $(LZ4) -v     | $(LZ4) -t
-	./datagen -g16KB    | $(LZ4) -9     | $(LZ4) -t
-	./datagen -g20KB > tmp-tlb-dg20k
+	$(DATAGEN) -g0       | $(LZ4) -v     | $(LZ4) -t
+	$(DATAGEN) -g16KB    | $(LZ4) -9     | $(LZ4) -t
+	$(DATAGEN) -g20KB > tmp-tlb-dg20k
 	$(LZ4) < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec
 	$(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec
 	$(LZ4) --no-frame-crc < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec
 	$(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec
-	./datagen           | $(LZ4) -BI    | $(LZ4) -t
-	./datagen -g6M -P99 | $(LZ4) -9BD   | $(LZ4) -t
-	./datagen -g17M     | $(LZ4) -9v    | $(LZ4) -qt
-	./datagen -g33M     | $(LZ4) --no-frame-crc | $(LZ4) -t
-	./datagen -g256MB   | $(LZ4) -vqB4D | $(LZ4) -t
+	$(DATAGEN)           | $(LZ4) -BI    | $(LZ4) -t
+	$(DATAGEN) -g6M -P99 | $(LZ4) -9BD   | $(LZ4) -t
+	$(DATAGEN) -g17M     | $(LZ4) -9v    | $(LZ4) -qt
+	$(DATAGEN) -g33M     | $(LZ4) --no-frame-crc | $(LZ4) -t
+	$(DATAGEN) -g256MB   | $(LZ4) -vqB4D | $(LZ4) -t
 	@echo "hello world" > tmp-tlb-hw
 	$(LZ4) --rm -f tmp-tlb-hw tmp-tlb-hw.lz4
 	test ! -f tmp-tlb-hw                      # must fail (--rm)
@@ -301,16 +360,18 @@
 	$(DIFF) -q tmp-tlb-hw tmp-tlb4
 	$(LZ4) -f tmp-tlb-hw
 	$(LZ4) --list tmp-tlb-hw.lz4           # test --list on valid single-frame file
-	cat tmp-tlb-hw >> tmp-tlb-hw.lz4
+	$(CAT) tmp-tlb-hw >> tmp-tlb-hw.lz4
 	$(LZ4) -f tmp-tlb-hw.lz4               # uncompress valid frame followed by invalid data
 	$(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv  # test block checksum
-	# ./datagen -g20KB generates the same file every single time
-	# cannot save output of ./datagen -g20KB as input file to lz4 because the following shell commands are run before ./datagen -g20KB
-	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9
-	test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1
-	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1
+	# $(DATAGEN) -g20KB generates the same file every single time
+	# cannot save output of $(DATAGEN) -g20KB as input file to lz4 because the following shell commands are run before $(DATAGEN) -g20KB
+	test "$(shell $(DATAGEN) -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell $(DATAGEN) -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9
+	test "$(shell $(DATAGEN) -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell $(DATAGEN) -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1
+	test "$(shell $(DATAGEN) -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell $(DATAGEN) -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1
 	! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0
 	! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1
+	# High --fast values can result in out-of-bound dereferences #876
+	$(DATAGEN) -g1M | $(LZ4) -c --fast=999999999 > /dev/null
 	# Test for #596
 	@echo "TEST" > tmp-tlb-test
 	$(LZ4) -m tmp-tlb-test
@@ -322,10 +383,10 @@
 
 test-lz4-dict: lz4 datagen
 	@echo "\n ---- test lz4 compression/decompression with dictionary ----"
-	./datagen -g16KB > tmp-dict
-	./datagen -g32KB > tmp-dict-sample-32k
+	$(DATAGEN) -g16KB > tmp-dict
+	$(DATAGEN) -g32KB > tmp-dict-sample-32k
 	< tmp-dict-sample-32k $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-32k
-	./datagen -g128MB > tmp-dict-sample-128m
+	$(DATAGEN) -g128MB > tmp-dict-sample-128m
 	< tmp-dict-sample-128m $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-128m
 	touch tmp-dict-sample-0
 	< tmp-dict-sample-0 $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-0
@@ -334,10 +395,10 @@
 	< tmp-dict-sample-0 $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-0
 
 	@echo "\n ---- test lz4 dictionary loading ----"
-	./datagen -g128KB > tmp-dict-data-128KB
+	$(DATAGEN) -g128KB > tmp-dict-data-128KB
 	set -e; \
 	for l in 0 1 4 128 32767 32768 32769 65535 65536 65537 98303 98304 98305 131071 131072 131073; do \
-		./datagen -g$$l > tmp-dict-$$l; \
+		$(DATAGEN) -g$$l > tmp-dict-$$l; \
 		$(DD) if=tmp-dict-$$l of=tmp-dict-$$l-tail bs=1 count=65536 skip=$$((l > 65536 ? l - 65536 : 0)); \
 		< tmp-dict-$$l      $(LZ4) -D stdin tmp-dict-data-128KB -c | $(LZ4) -dD tmp-dict-$$l-tail | $(DIFF) - tmp-dict-data-128KB; \
 		< tmp-dict-$$l-tail $(LZ4) -D stdin tmp-dict-data-128KB -c | $(LZ4) -dD tmp-dict-$$l      | $(DIFF) - tmp-dict-data-128KB; \
@@ -347,28 +408,28 @@
 
 test-lz4-hugefile: lz4 datagen
 	@echo "\n ---- test huge files compression/decompression ----"
-	./datagen -g6GB   | $(LZ4) -vB5D  | $(LZ4) -qt
-	./datagen -g5GB   | $(LZ4) -v4BD  | $(LZ4) -qt
+	./datagen -g6GB    | $(LZ4) -vB5D  | $(LZ4) -qt
+	./datagen -g4500MB | $(LZ4) -v3BD | $(LZ4) -qt
 	# test large file size [2-4] GB
-	@./datagen -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1
+	@$(DATAGEN) -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1
 	@ls -ls tmphf1
-	@./datagen -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmphf2
+	@$(DATAGEN) -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmphf2
 	@ls -ls tmphf2
 	$(DIFF) -s tmphf1 tmphf2
 	@$(RM) tmphf*
 
 test-lz4-testmode: lz4 datagen
 	@echo "\n ---- bench mode ----"
-	$(LZ4) -bi1
+	$(LZ4) -bi0
 	@echo "\n ---- test mode ----"
-	! ./datagen | $(LZ4) -t
-	! ./datagen | $(LZ4) -tf
+	! $(DATAGEN) | $(LZ4) -t
+	! $(DATAGEN) | $(LZ4) -tf
 	@echo "\n ---- pass-through mode ----"
 	@echo "Why hello there " > tmp-tlt2.lz4
 	! $(LZ4) -f tmp-tlt2.lz4 > $(VOID)
-	! ./datagen | $(LZ4) -dc  > $(VOID)
-	! ./datagen | $(LZ4) -df > $(VOID)
-	./datagen | $(LZ4) -dcf > $(VOID)
+	! $(DATAGEN) | $(LZ4) -dc  > $(VOID)
+	! $(DATAGEN) | $(LZ4) -df > $(VOID)
+	$(DATAGEN) | $(LZ4) -dcf > $(VOID)
 	@echo "Hello World !" > tmp-tlt1
 	$(LZ4) -dcf tmp-tlt1
 	@echo "from underground..." > tmp-tlt2
@@ -382,18 +443,18 @@
 
 test-lz4-opt-parser: lz4 datagen
 	@echo "\n ---- test opt-parser ----"
-	./datagen -g16KB      | $(LZ4) -12      | $(LZ4) -t
-	./datagen -P10        | $(LZ4) -12B4    | $(LZ4) -t
-	./datagen -g256K      | $(LZ4) -12B4D   | $(LZ4) -t
-	./datagen -g512K -P25 | $(LZ4) -12BD    | $(LZ4) -t
-	./datagen -g1M        | $(LZ4) -12B5    | $(LZ4) -t
-	./datagen -g2M -P99   | $(LZ4) -11B4D   | $(LZ4) -t
-	./datagen -g4M        | $(LZ4) -11vq    | $(LZ4) -qt
-	./datagen -g8M        | $(LZ4) -11B4    | $(LZ4) -t
-	./datagen -g16M -P90  | $(LZ4) -11B5    | $(LZ4) -t
-	./datagen -g32M -P10  | $(LZ4) -11B5D   | $(LZ4) -t
+	$(DATAGEN) -g16KB      | $(LZ4) -12      | $(LZ4) -t
+	$(DATAGEN) -P10        | $(LZ4) -12B4    | $(LZ4) -t
+	$(DATAGEN) -g256K      | $(LZ4) -12B4D   | $(LZ4) -t
+	$(DATAGEN) -g512K -P25 | $(LZ4) -12BD    | $(LZ4) -t
+	$(DATAGEN) -g1M        | $(LZ4) -12B5    | $(LZ4) -t
+	$(DATAGEN) -g2M -P99   | $(LZ4) -11B4D   | $(LZ4) -t
+	$(DATAGEN) -g4M        | $(LZ4) -11vq    | $(LZ4) -qt
+	$(DATAGEN) -g8M        | $(LZ4) -11B4    | $(LZ4) -t
+	$(DATAGEN) -g16M -P90  | $(LZ4) -11B5    | $(LZ4) -t
+	$(DATAGEN) -g32M -P10  | $(LZ4) -11B5D   | $(LZ4) -t
 
-test-lz4-essentials : lz4 datagen test-lz4-basic test-lz4-multiple \
+test-lz4-essentials : lz4 datagen test-lz4-basic test-lz4-multiple test-lz4-multiple-legacy \
                       test-lz4-frame-concatenation test-lz4-testmode \
                       test-lz4-contentSize test-lz4-dict
 	@$(RM) tmp*
@@ -404,35 +465,35 @@
 
 test-lz4c: lz4c datagen
 	@echo "\n ---- test lz4c variant ----"
-	./datagen -g256MB | $(LZ4)c -l -v    | $(LZ4)c   -t
+	$(DATAGEN) -g256MB | $(LZ4)c -l -v    | $(LZ4)c   -t
 
 test-lz4c32: CFLAGS+=-m32
 test-lz4c32: test-lz4
 
 test-interop-32-64: lz4 lz4c32 datagen
 	@echo "\n ---- test interoperability 32-bits -vs- 64 bits ----"
-	./datagen -g16KB  | $(LZ4)c32 -9     | $(LZ4)    -t
-	./datagen -P10    | $(LZ4)    -9B4   | $(LZ4)c32 -t
-	./datagen         | $(LZ4)c32        | $(LZ4)    -t
-	./datagen -g1M    | $(LZ4)    -3B5   | $(LZ4)c32 -t
-	./datagen -g256MB | $(LZ4)c32 -vqB4D | $(LZ4)    -qt
-	./datagen -g1G -P90 | $(LZ4)         | $(LZ4)c32 -t
-	./datagen -g6GB   | $(LZ4)c32 -vq9BD | $(LZ4)    -qt
+	$(DATAGEN) -g16KB  | $(LZ4)c32 -9     | $(LZ4)    -t
+	$(DATAGEN) -P10    | $(LZ4)    -9B4   | $(LZ4)c32 -t
+	$(DATAGEN)         | $(LZ4)c32        | $(LZ4)    -t
+	$(DATAGEN) -g1M    | $(LZ4)    -3B5   | $(LZ4)c32 -t
+	$(DATAGEN) -g256MB | $(LZ4)c32 -vqB4D | $(LZ4)    -qt
+	$(DATAGEN) -g1G -P90 | $(LZ4)         | $(LZ4)c32 -t
+	$(DATAGEN) -g6GB   | $(LZ4)c32 -vq9BD | $(LZ4)    -qt
 
 test-lz4c32-basic: lz4c32 datagen
 	@echo "\n ---- test lz4c32 32-bits version ----"
-	./datagen -g16KB  | $(LZ4)c32 -9     | $(LZ4)c32 -t
-	./datagen         | $(LZ4)c32        | $(LZ4)c32 -t
-	./datagen -g256MB | $(LZ4)c32 -vqB4D | $(LZ4)c32 -qt
-	./datagen -g6GB   | $(LZ4)c32 -vqB5D | $(LZ4)c32 -qt
+	$(DATAGEN) -g16KB  | $(LZ4)c32 -9     | $(LZ4)c32 -t
+	$(DATAGEN)         | $(LZ4)c32        | $(LZ4)c32 -t
+	$(DATAGEN) -g256MB | $(LZ4)c32 -vqB4D | $(LZ4)c32 -qt
+	$(DATAGEN) -g6GB   | $(LZ4)c32 -vqB5D | $(LZ4)c32 -qt
 
 test-platform:
 	@echo "\n ---- test lz4 $(QEMU_SYS) platform ----"
-	$(QEMU_SYS) ./datagen -g16KB  | $(QEMU_SYS) $(LZ4) -9     | $(QEMU_SYS) $(LZ4) -t
-	$(QEMU_SYS) ./datagen         | $(QEMU_SYS) $(LZ4)        | $(QEMU_SYS) $(LZ4) -t
-	$(QEMU_SYS) ./datagen -g256MB | $(QEMU_SYS) $(LZ4) -vqB4D | $(QEMU_SYS) $(LZ4) -qt
+	$(QEMU_SYS) $(DATAGEN) -g16KB  | $(QEMU_SYS) $(LZ4) -9     | $(QEMU_SYS) $(LZ4) -t
+	$(QEMU_SYS) $(DATAGEN)         | $(QEMU_SYS) $(LZ4)        | $(QEMU_SYS) $(LZ4) -t
+	$(QEMU_SYS) $(DATAGEN) -g256MB | $(QEMU_SYS) $(LZ4) -vqB4D | $(QEMU_SYS) $(LZ4) -qt
 ifneq ($(QEMU_SYS),qemu-arm-static)
-	$(QEMU_SYS) ./datagen -g3GB   | $(QEMU_SYS) $(LZ4) -vqB5D | $(QEMU_SYS) $(LZ4) -qt
+	$(QEMU_SYS) $(DATAGEN) -g3GB   | $(QEMU_SYS) $(LZ4) -vqB5D | $(QEMU_SYS) $(LZ4) -qt
 endif
 
 test-fullbench: fullbench
@@ -455,13 +516,13 @@
 
 test-mem: lz4 datagen fuzzer frametest fullbench
 	@echo "\n ---- valgrind tests : memory analyzer ----"
-	valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)
-	./datagen -g16KB > ftmdg16K
+	valgrind --leak-check=yes --error-exitcode=1 $(DATAGEN) -g50M > $(VOID)
+	$(DATAGEN) -g16KB > ftmdg16K
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -BD -f ftmdg16K $(VOID)
-	./datagen -g16KB -s2 > ftmdg16K2
-	./datagen -g16KB -s3 > ftmdg16K3
+	$(DATAGEN) -g16KB -s2 > ftmdg16K2
+	$(DATAGEN) -g16KB -s3 > ftmdg16K3
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --force --multiple ftmdg16K ftmdg16K2 ftmdg16K3
-	./datagen -g7MB > ftmdg7M
+	$(DATAGEN) -g7MB > ftmdg7M
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -B5D -f ftmdg7M ftmdg16K2
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -t ftmdg16K2
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -bi1 ftmdg7M
@@ -476,4 +537,8 @@
 test-mem32: lz4c32 datagen
 # unfortunately, valgrind doesn't seem to work with non-native binary...
 
+test-decompress-partial : decompress-partial
+	@echo "\n ---- test decompress-partial ----"
+	./decompress-partial$(EXT)
+
 endif
diff --git a/tests/checkFrame.c b/tests/checkFrame.c
index 139a599..f9a1c14 100644
--- a/tests/checkFrame.c
+++ b/tests/checkFrame.c
@@ -24,15 +24,6 @@
   */
 
   /*-************************************
-  *  Compiler specific
-  **************************************/
-  #ifdef _MSC_VER    /* Visual Studio */
-  #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-  #  pragma warning(disable : 4146)        /* disable: C4146: minus unsigned expression */
-  #endif
-
-
-  /*-************************************
   *  Includes
   **************************************/
   #include "util.h"       /* U32 */
@@ -301,6 +292,7 @@
                 freeCResources(ress);
                 EXM_THROW(1, "%s: %s \n", argument, strerror(errno));
             }
+            assert (srcFile != NULL);
             err = frameCheck(ress, srcFile, bsid, blockSize);
             freeCResources(ress);
             fclose(srcFile);
diff --git a/tests/decompress-partial.c b/tests/decompress-partial.c
new file mode 100644
index 0000000..4e124b7
--- /dev/null
+++ b/tests/decompress-partial.c
@@ -0,0 +1,49 @@
+#include "stdio.h"
+#include "string.h"
+#include "lz4.h"
+
+const char source[] =
+  "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod\n"
+  "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim\n"
+  "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea\n"
+  "commodo consequat. Duis aute irure dolor in reprehenderit in voluptate\n"
+  "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat\n"
+  "cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id\n"
+  "est laborum.\n"
+  "\n"
+  "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium\n"
+  "doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore\n"
+  "veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim\n"
+  "ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia\n"
+  "consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque\n"
+  "porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur,\n"
+  "adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore\n"
+  "et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis\n"
+  "nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid\n"
+  "ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea\n"
+  "voluptate velit esse quam nihil molestiae consequatur, vel illum qui\n"
+  "dolorem eum fugiat quo voluptas nulla pariatur?\n";
+
+#define BUFFER_SIZE 2048
+
+int main(void)
+{
+  int srcLen = (int)strlen(source);
+  char cmpBuffer[BUFFER_SIZE];
+  char outBuffer[BUFFER_SIZE];
+  int cmpSize;
+  int i;
+  
+  cmpSize = LZ4_compress_default(source, cmpBuffer, srcLen, BUFFER_SIZE);
+
+  for (i = cmpSize; i < cmpSize + 10; ++i) {
+    int result = LZ4_decompress_safe_partial(cmpBuffer, outBuffer, i, srcLen, BUFFER_SIZE);
+    if ((result < 0) || (result != srcLen) || memcmp(source, outBuffer, srcLen)) {
+      printf("test decompress-partial error \n");
+      return -1;
+    }
+  }
+  
+  printf("test decompress-partial OK \n");
+  return 0;
+}
diff --git a/tests/frametest.c b/tests/frametest.c
index 1b932e4..e613cbf 100644
--- a/tests/frametest.c
+++ b/tests/frametest.c
@@ -27,8 +27,7 @@
 *  Compiler specific
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
-#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
+#  pragma warning(disable : 26451)     /* disable: Arithmetic overflow */
 #endif
 
 
@@ -200,6 +199,24 @@
         DISPLAYLEVEL(3, " %u \n", (U32)cBound);
     }
 
+    /* LZ4F_compressBound() : special case : automatic flushing enabled */
+    DISPLAYLEVEL(3, "LZ4F_compressBound(1 KB, autoFlush=1) = ");
+    {   size_t cBound;
+        LZ4F_preferences_t autoFlushPrefs;
+        memset(&autoFlushPrefs, 0, sizeof(autoFlushPrefs));
+        autoFlushPrefs.autoFlush = 1;
+        cBound = LZ4F_compressBound(1 KB, &autoFlushPrefs);
+        if (cBound > 64 KB) goto _output_error;
+        DISPLAYLEVEL(3, " %u \n", (U32)cBound);
+    }
+
+    /* LZ4F_compressBound() : special case : automatic flushing disabled */
+    DISPLAYLEVEL(3, "LZ4F_compressBound(1 KB, autoFlush=0) = ");
+    {   size_t const cBound = LZ4F_compressBound(1 KB, &prefs);
+        if (cBound < 64 KB) goto _output_error;
+        DISPLAYLEVEL(3, " %u \n", (U32)cBound);
+    }
+
     /* Special case : null-content frame */
     testSize = 0;
     DISPLAYLEVEL(3, "LZ4F_compressFrame, compress null content : ");
@@ -767,7 +784,7 @@
 
 static void locateBuffDiff(const void* buff1, const void* buff2, size_t size, o_scenario_e o_scenario)
 {
-    if (displayLevel >= 5) {
+    if (displayLevel >= 2) {
         size_t p=0;
         const BYTE* b1=(const BYTE*)buff1;
         const BYTE* b2=(const BYTE*)buff2;
@@ -797,7 +814,8 @@
                 U64 crcOrig,
                 U32* const randState,
                 LZ4F_dctx* const dCtx,
-                U32 seed, U32 testNb)
+                U32 seed, U32 testNb,
+                int findErrorPos)
 {
     const BYTE* ip = (const BYTE*)cSrc;
     const BYTE* const iend = ip + cSize;
@@ -820,32 +838,37 @@
         size_t iSize = iSizeMax;
         size_t const oSizeCand = (FUZ_rand(randState) & ((1<<nbBitsO)-1)) + 2;
         size_t const oSizeMax = MIN(oSizeCand, (size_t)(oend-op));
+        int const sentinelTest = (op + oSizeMax < oend);
         size_t oSize = oSizeMax;
         BYTE const mark = (BYTE)(FUZ_rand(randState) & 255);
         LZ4F_decompressOptions_t dOptions;
         memset(&dOptions, 0, sizeof(dOptions));
         dOptions.stableDst = FUZ_rand(randState) & 1;
         if (o_scenario == o_overwrite) dOptions.stableDst = 0;  /* overwrite mode */
-        if (op + oSizeMax < oend) op[oSizeMax] = mark;
+        if (sentinelTest) op[oSizeMax] = mark;
 
         DISPLAYLEVEL(7, "dstCapacity=%u,  presentedInput=%u \n", (unsigned)oSize, (unsigned)iSize);
 
         /* read data from byte-exact buffer to catch out-of-bound reads */
         {   void* const iBuffer = malloc(iSizeMax);
+            void* const tmpop = (FUZ_rand(randState) & (oSize == 0)) ? NULL : op;
+            const void* const tmpip = (FUZ_rand(randState) & (iSize == 0)) ? NULL : iBuffer;
             assert(iBuffer != NULL);
             memcpy(iBuffer, ip, iSizeMax);
-            moreToFlush = LZ4F_decompress(dCtx, op, &oSize, iBuffer, &iSize, &dOptions);
+            moreToFlush = LZ4F_decompress(dCtx, tmpop, &oSize, tmpip, &iSize, &dOptions);
             free(iBuffer);
         }
         DISPLAYLEVEL(7, "oSize=%u,  readSize=%u \n", (unsigned)oSize, (unsigned)iSize);
 
-        if (op + oSizeMax < oend) {
+        if (sentinelTest) {
             CHECK(op[oSizeMax] != mark, "op[oSizeMax] = %02X != %02X : "
                     "Decompression overwrites beyond assigned dst size",
                     op[oSizeMax], mark);
         }
-        if (LZ4F_getErrorCode(moreToFlush) == LZ4F_ERROR_contentChecksum_invalid)
-            locateBuffDiff(srcRef, dst, decompressedSize, o_scenario);
+        if (LZ4F_getErrorCode(moreToFlush) == LZ4F_ERROR_contentChecksum_invalid) {
+            if (findErrorPos) DISPLAYLEVEL(2, "checksum error detected \n");
+            if (findErrorPos) locateBuffDiff(srcRef, dst, decompressedSize, o_scenario);
+        }
         if (LZ4F_isError(moreToFlush)) return moreToFlush;
 
         XXH64_update(&xxh64, op, oSize);
@@ -865,7 +888,7 @@
     if (totalOut) {  /* otherwise, it's a skippable frame */
         U64 const crcDecoded = XXH64_digest(&xxh64);
         if (crcDecoded != crcOrig) {
-            locateBuffDiff(srcRef, dst, decompressedSize, o_scenario);
+            if (findErrorPos) locateBuffDiff(srcRef, dst, decompressedSize, o_scenario);
             return LZ4F_ERROR_contentChecksum_invalid;
     }   }
     return 0;
@@ -877,7 +900,8 @@
                                U64 crcOrig,
                                U32* const randState,
                                LZ4F_dctx* const dCtx,
-                               U32 seed, U32 testNb)
+                               U32 seed, U32 testNb,
+                               int findErrorPos)
 {
     o_scenario_e const o_scenario = (o_scenario_e)(FUZ_rand(randState) % 3);   /* 0 : contiguous; 1 : non-contiguous; 2 : dst overwritten */
     /* tighten dst buffer conditions */
@@ -894,7 +918,7 @@
                                      crcOrig,
                                      randState,
                                      dCtx,
-                                     seed, testNb);
+                                     seed, testNb, findErrorPos);
 
     free(dstBuffer);
     return result;
@@ -977,13 +1001,13 @@
             BYTE* op = (BYTE*)compressedBuffer;
             BYTE* const oend = op + (neverFlush ? LZ4F_compressFrameBound(srcSize, prefsPtr) : compressedBufferSize);  /* when flushes are possible, can't guarantee a max compressed size */
             unsigned const maxBits = FUZ_highbit((U32)srcSize);
-            size_t cSegmentSize;
             LZ4F_compressOptions_t cOptions;
             memset(&cOptions, 0, sizeof(cOptions));
-            cSegmentSize = LZ4F_compressBegin(cCtx, op, (size_t)(oend-op), prefsPtr);
-            CHECK(LZ4F_isError(cSegmentSize), "Compression header failed (error %i)",
-                                            (int)cSegmentSize);
-            op += cSegmentSize;
+            {   size_t const fhSize = LZ4F_compressBegin(cCtx, op, (size_t)(oend-op), prefsPtr);
+                CHECK(LZ4F_isError(fhSize), "Compression header failed (error %i)",
+                                            (int)fhSize);
+                op += fhSize;
+            }
             while (ip < iend) {
                 unsigned const nbBitsSeg = FUZ_rand(&randState) % maxBits;
                 size_t const sampleMax = (FUZ_rand(&randState) & ((1<<nbBitsSeg)-1)) + 1;
@@ -1006,8 +1030,20 @@
                         DISPLAYLEVEL(6,"flushing %u bytes \n", (unsigned)flushSize);
                         CHECK(LZ4F_isError(flushSize), "Compression failed (error %i)", (int)flushSize);
                         op += flushSize;
-                }   }
-            }
+                        if ((FUZ_rand(&randState) % 1024) == 3) {
+                            /* add an empty block (requires uncompressed flag) */
+                            op[0] = op[1] = op[2] = 0;
+                            op[3] = 0x80; /* 0x80000000U in little-endian format */
+                            op += 4;
+                            if ((prefsPtr!= NULL) && prefsPtr->frameInfo.blockChecksumFlag) {
+                                U32 const bc32 = XXH32(op, 0, 0);
+                                op[0] = (BYTE)bc32;  /* little endian format */
+                                op[1] = (BYTE)(bc32>>8);
+                                op[2] = (BYTE)(bc32>>16);
+                                op[3] = (BYTE)(bc32>>24);
+                                op += 4;
+                }   }   }   }
+            }  /* while (ip<iend) */
             CHECK(op>=oend, "LZ4F_compressFrameBound overflow");
             {   size_t const dstEndSafeSize = LZ4F_compressBound(0, prefsPtr);
                 int const tooSmallDstEnd = ((FUZ_rand(&randState) & 31) == 3);
@@ -1024,7 +1060,7 @@
                 CHECK(op[dstEndSize] != canaryByte, "LZ4F_compressEnd writes beyond dstCapacity !");
                 if (LZ4F_isError(flushedSize)) {
                     if (tooSmallDstEnd) /* failure is allowed */ continue;
-                    CHECK(1, "Compression completion failed (error %i : %s)",
+                    CHECK(!tooSmallDstEnd, "Compression completion failed (error %i : %s)",
                             (int)flushedSize, LZ4F_getErrorName(flushedSize));
                 }
                 op += flushedSize;
@@ -1036,7 +1072,7 @@
 
         /* multi-segments decompression */
         DISPLAYLEVEL(6, "normal decompression \n");
-        {   size_t result = test_lz4f_decompression(compressedBuffer, cSize, srcStart, srcSize, crcOrig, &randState, dCtx, seed, testNb);
+        {   size_t result = test_lz4f_decompression(compressedBuffer, cSize, srcStart, srcSize, crcOrig, &randState, dCtx, seed, testNb, 1 /*findError*/ );
             CHECK (LZ4F_isError(result), "multi-segment decompression failed (error %i => %s)",
                                         (int)result, LZ4F_getErrorName(result));
         }
@@ -1066,10 +1102,10 @@
 
         /* test decompression on noisy src */
         DISPLAYLEVEL(6, "noisy decompression \n");
-        test_lz4f_decompression(compressedBuffer, cSize, srcStart, srcSize, crcOrig, &randState, dCtxNoise, seed, testNb);
+        test_lz4f_decompression(compressedBuffer, cSize, srcStart, srcSize, crcOrig, &randState, dCtxNoise, seed, testNb, 0 /*don't search error Pos*/ );
         /* note : we don't analyze result here : it probably failed, which is expected.
-         * We just check for potential out-of-bound reads and writes. */
-         LZ4F_resetDecompressionContext(dCtxNoise);  /* context must be reset after an error */
+         * The sole purpose is to catch potential out-of-bound reads and writes. */
+        LZ4F_resetDecompressionContext(dCtxNoise);  /* context must be reset after an error */
 #endif
 
 }   /* for ( ; (testNb < nbTests) ; ) */
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 7d74d3f..cb9b684 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -24,8 +24,8 @@
 */
 
 
-// S_ISREG & gettimeofday() are not supported by MSVC
 #if defined(_MSC_VER) || defined(_WIN32)
+   /* S_ISREG & gettimeofday() are not supported by MSVC */
 #  define BMK_LEGACY_TIMER 1
 #endif
 
@@ -134,7 +134,7 @@
 static size_t BMK_findMaxMem(U64 requiredMem)
 {
     size_t step = 64 MB;
-    BYTE* testmem=NULL;
+    BYTE* testmem = NULL;
 
     requiredMem = (((requiredMem >> 26) + 1) << 26);
     requiredMem += 2*step;
@@ -156,6 +156,14 @@
 
 
 /*********************************************************
+*  Memory management, to test LZ4_USER_MEMORY_FUNCTIONS
+*********************************************************/
+void* LZ4_malloc(size_t s) { return malloc(s); }
+void* LZ4_calloc(size_t n, size_t s) { return calloc(n,s); }
+void  LZ4_free(void* p) { free(p); }
+
+
+/*********************************************************
 *  Benchmark function
 *********************************************************/
 static LZ4_stream_t LZ4_stream;
@@ -292,9 +300,14 @@
     return outSize;
 }
 
+static int local_LZ4_decompress_safe_withPrefix64k(const char* in, char* out, int inSize, int outSize)
+{
+    LZ4_decompress_safe_withPrefix64k(in, out, inSize, outSize);
+    return outSize;
+}
+
 static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int inSize, int outSize)
 {
-    (void)inSize;
     LZ4_decompress_safe_usingDict(in, out, inSize, outSize, out - 65536, 65536);
     return outSize;
 }
@@ -381,6 +394,39 @@
 
 }
 
+/* always provide input by block of 64 KB */
+static int local_LZ4F_decompress_noHint(const char* src, char* dst, int srcSize, int dstSize)
+{
+    size_t totalInSize = (size_t)srcSize;
+    size_t maxOutSize = (size_t)dstSize;
+
+    size_t inPos = 0;
+    size_t inSize = 64 KB;
+    size_t outPos = 0;
+    size_t outRemaining = maxOutSize - outPos;
+
+    for (;;) {
+        size_t const sizeHint = LZ4F_decompress(g_dCtx, dst+outPos, &outRemaining, src+inPos, &inSize, NULL);
+        assert(!LZ4F_isError(sizeHint));
+
+        inPos += inSize;
+        inSize = (inPos + 64 KB <= totalInSize) ? 64 KB : totalInSize - inPos;
+
+        outPos += outRemaining;
+        outRemaining = maxOutSize - outPos;
+
+        if (!sizeHint) break;
+    }
+
+    /* frame completed */
+    if (inPos != totalInSize) {
+        DISPLAY("Error decompressing frame : must read (%u) full frame (%u) \n",
+                (unsigned)inPos, (unsigned)totalInSize);
+        exit(10);
+    }
+    return (int)outPos;
+
+}
 
 #define NB_COMPRESSION_ALGORITHMS 100
 #define NB_DECOMPRESSION_ALGORITHMS 100
@@ -399,25 +445,25 @@
       char* compressed_buff=NULL;
       const char* const inFileName = fileNamesTable[fileIdx++];
       FILE* const inFile = fopen( inFileName, "rb" );
-      U64   inFileSize;
-      size_t benchedSize;
+      U64 const inFileSize = UTIL_getFileSize(inFileName);
+      size_t benchedSize = BMK_findMaxMem(inFileSize*2) / 2;   /* because 2 buffers */
       int nbChunks;
       int maxCompressedChunkSize;
       size_t readSize;
       int compressedBuffSize;
       U32 crcOriginal;
 
-      /* Check file existence */
-      if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
+      /* Check infile pre-requisites */
+      if (inFile==NULL) { DISPLAY("Pb opening %s \n", inFileName); return 11; }
+      if (inFileSize==0) { DISPLAY("file is empty \n"); fclose(inFile); return 11; }
+      if (benchedSize==0) { DISPLAY("not enough memory \n"); fclose(inFile); return 11; }
 
       /* Memory size adjustments */
-      inFileSize = UTIL_getFileSize(inFileName);
-      if (inFileSize==0) { DISPLAY( "file is empty\n"); fclose(inFile); return 11; }
-      benchedSize = BMK_findMaxMem(inFileSize*2) / 2;   /* because 2 buffers */
-      if (benchedSize==0) { DISPLAY( "not enough memory\n"); fclose(inFile); return 11; }
       if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
-      if (benchedSize < inFileSize)
-          DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
+      if (benchedSize < inFileSize) {
+          DISPLAY("Not enough memory for '%s' full size; testing %i MB only... \n",
+                inFileName, (int)(benchedSize>>20));
+      }
 
       /* Allocation */
       chunkP = (struct chunkParameters*) malloc(((benchedSize / (size_t)g_chunkSize)+1) * sizeof(struct chunkParameters));
@@ -427,7 +473,7 @@
       compressedBuffSize = nbChunks * maxCompressedChunkSize;
       compressed_buff = (char*)malloc((size_t)compressedBuffSize);
       if(!chunkP || !orig_buff || !compressed_buff) {
-          DISPLAY("\nError: not enough memory!\n");
+          DISPLAY("\nError: not enough memory! \n");
           fclose(inFile);
           free(orig_buff);
           free(compressed_buff);
@@ -475,7 +521,7 @@
                 size_t remaining = benchedSize;
                 char* in = orig_buff;
                 char* out = compressed_buff;
-                nbChunks = (int) (((int)benchedSize + (g_chunkSize-1))/ g_chunkSize);
+                assert(nbChunks >= 1);
                 for (i=0; i<nbChunks; i++) {
                     chunkP[i].id = (U32)i;
                     chunkP[i].origBuffer = in; in += g_chunkSize;
@@ -608,6 +654,7 @@
             case 2: decompressionFunction = local_LZ4_decompress_fast_usingDict_prefix; dName = "LZ4_decompress_fast_usingDict(prefix)"; break;
             case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break;
             case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break;
+            case 5: decompressionFunction = local_LZ4_decompress_safe_withPrefix64k; dName = "LZ4_decompress_safe_withPrefix64k"; break;
             case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break;
             case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; checkResult = 0; break;
 #ifndef LZ4_DLL_IMPORT
@@ -615,8 +662,10 @@
 #endif
             case 10:
             case 11:
+            case 12:
                 if (dAlgNb == 10) { decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; }  /* can be skipped */
                 if (dAlgNb == 11) { decompressionFunction = local_LZ4F_decompress_followHint; dName = "LZ4F_decompress_followHint"; }  /* can be skipped */
+                if (dAlgNb == 12) { decompressionFunction = local_LZ4F_decompress_noHint; dName = "LZ4F_decompress_noHint"; }  /* can be skipped */
                 /* prepare compressed data using frame format */
                 {   size_t const fcsize = LZ4F_compressFrame(compressed_buff, (size_t)compressedBuffSize, orig_buff, benchedSize, NULL);
                     assert(!LZ4F_isError(fcsize));
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 8a095c4..a824813 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -341,7 +341,7 @@
     exit(1);                                           \
 }
 
-#   define FUZ_CHECKTEST(cond, ...)  { if (cond) { EXIT_MSG(__VA_ARGS__) } }
+#   define FUZ_CHECKTEST(cond, ...)  if (cond) { EXIT_MSG(__VA_ARGS__) }
 
 #   define FUZ_DISPLAYTEST(...) {                 \
                 testNb++;                         \
@@ -446,10 +446,10 @@
                 FUZ_DISPLAYTEST();
                 decodedBuffer[srcSize] = canary;
                 {   int const dSize = LZ4_decompress_safe(compressedBuffer, decodedBuffer, cSize, srcSize);
-                    FUZ_CHECKTEST(dSize<0, "LZ4_decompress_safe() failed on data compressed by LZ4_compressHC_destSize");
-                    FUZ_CHECKTEST(dSize!=srcSize, "LZ4_decompress_safe() failed : did not fully decompressed data");
+                    FUZ_CHECKTEST(dSize<0, "LZ4_decompress_safe failed (%i) on data compressed by LZ4_compressHC_destSize", dSize);
+                    FUZ_CHECKTEST(dSize!=srcSize, "LZ4_decompress_safe failed : decompressed %i bytes, was supposed to decompress %i bytes", dSize, srcSize);
                 }
-                FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe() overwrite dst buffer !");
+                FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe overwrite dst buffer !");
                 {   U32 const crcDec = XXH32(decodedBuffer, (size_t)srcSize, 0);
                     FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data");
             }   }
@@ -618,13 +618,16 @@
 
         /* Test partial decoding => must work */
         FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
-        {   size_t const missingBytes = FUZ_rand(&randState) % (unsigned)blockSize;
-            int const targetSize = (int)((size_t)blockSize - missingBytes);
+        {   size_t const missingOutBytes = FUZ_rand(&randState) % (unsigned)blockSize;
+            int const targetSize = (int)((size_t)blockSize - missingOutBytes);
+            size_t const extraneousInBytes = FUZ_rand(&randState) % 2;
+            int const inCSize = (int)((size_t)compressedSize + extraneousInBytes);
             char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
-            int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize);
+            int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, inCSize, targetSize, blockSize);
             FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
             FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize);
             FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize);
+            FUZ_CHECKTEST(memcmp(block, decodedBuffer, (size_t)targetSize), "LZ4_decompress_safe_partial: corruption detected in regenerated data");
         }
 
         /* Test Compression with limited output size */
@@ -787,7 +790,6 @@
             LZ4_attach_dictionary(&LZ4_stream, &LZ4dictBody);
             blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
             FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed");
-            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirty, "context should be good");
 
             /* In the future, it might be desirable to let extDictCtx mode's
              * output diverge from the output generated by regular extDict mode.
@@ -812,7 +814,6 @@
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer");
             FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
             FUZ_CHECKTEST(XXH32(compressedBuffer, (size_t)ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
-            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirty, "context should be good");
 
             FUZ_DISPLAYTEST();
             LZ4_resetStream_fast(&LZ4_stream);
@@ -822,7 +823,6 @@
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer");
             FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
             FUZ_CHECKTEST(XXH32(compressedBuffer, (size_t)ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
-            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirty, "context should be good");
         }
 
         /* Decompress with dictionary as external */
@@ -859,12 +859,12 @@
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
 
         FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer");
-        {   U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
-            if ((U32)blockSize > missingBytes) {
-                decodedBuffer[(U32)blockSize-missingBytes] = 0;
+        {   int const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
+            if (blockSize > missingBytes) {
+                decodedBuffer[blockSize-missingBytes] = 0;
                 ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-missingBytes, dict, dictSize);
-                FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%u byte)", missingBytes);
-                FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%u byte) (blockSize=%i)", missingBytes, blockSize);
+                FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%i byte)", missingBytes);
+                FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%i byte) (blockSize=%i)", missingBytes, blockSize);
         }   }
 
         /* Compress HC using External dictionary */
@@ -931,7 +931,8 @@
             LZ4_resetStreamHC_fast (LZ4_streamHC, compressionLevel);
             LZ4_attach_HC_dictionary(LZ4_streamHC, LZ4dictHC);
             ret = LZ4_compress_HC_continue(LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
-            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx and fast reset size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx and fast reset size is different (%i != %i)",
+                        ret, blockContinueCompressedSize);
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx and fast reset should work : enough size available within output buffer");
             FUZ_CHECKTEST(LZ4_streamHC->internal_donotuse.dirty, "Context should be clean");
 
@@ -951,13 +952,14 @@
 
         /* Compress HC continue destSize */
         FUZ_DISPLAYTEST();
-        {   int const availableSpace = (int)(FUZ_rand(&randState) % blockSize) + 5;
+        {   int const availableSpace = (int)(FUZ_rand(&randState) % (U32)blockSize) + 5;
             int consumedSize = blockSize;
             FUZ_DISPLAYTEST();
             LZ4_loadDictHC(LZ4dictHC, dict, dictSize);
             LZ4_setCompressionLevel(LZ4dictHC, compressionLevel);
             blockContinueCompressedSize = LZ4_compress_HC_continue_destSize(LZ4dictHC, block, compressedBuffer, &consumedSize, availableSpace);
-            DISPLAYLEVEL(5, " LZ4_compress_HC_continue_destSize : compressed %6i/%6i into %6i/%6i at cLevel=%i\n", consumedSize, blockSize, blockContinueCompressedSize, availableSpace, compressionLevel);
+            DISPLAYLEVEL(5, " LZ4_compress_HC_continue_destSize : compressed %6i/%6i into %6i/%6i at cLevel=%i \n",
+                        consumedSize, blockSize, blockContinueCompressedSize, availableSpace, compressionLevel);
             FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue_destSize failed");
             FUZ_CHECKTEST(blockContinueCompressedSize > availableSpace, "LZ4_compress_HC_continue_destSize write overflow");
             FUZ_CHECKTEST(consumedSize > blockSize, "LZ4_compress_HC_continue_destSize read overflow");
@@ -965,7 +967,7 @@
             FUZ_DISPLAYTEST();
             decodedBuffer[consumedSize] = 0;
             ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, consumedSize, dict, dictSize);
-            FUZ_CHECKTEST(ret!=consumedSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+            FUZ_CHECKTEST(ret != consumedSize, "LZ4_decompress_safe_usingDict regenerated %i bytes (%i expected)", ret, consumedSize);
             FUZ_CHECKTEST(decodedBuffer[consumedSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size")
             {   U32 const crcSrc = XXH32(block, (size_t)consumedSize, 0);
                 U32 const crcDst = XXH32(decodedBuffer, (size_t)consumedSize, 0);
@@ -977,10 +979,14 @@
 
         /* ***** End of tests *** */
         /* Fill stats */
-        bytes += blockSize;
-        cbytes += compressedSize;
-        hcbytes += HCcompressedSize;
-        ccbytes += blockContinueCompressedSize;
+        assert(blockSize >= 0);
+        bytes += (unsigned)blockSize;
+        assert(compressedSize >= 0);
+        cbytes += (unsigned)compressedSize;
+        assert(HCcompressedSize >= 0);
+        hcbytes += (unsigned)HCcompressedSize;
+        assert(blockContinueCompressedSize >= 0);
+        ccbytes += (unsigned)blockContinueCompressedSize;
     }
 
     if (nbCycles<=1) nbCycles = cycleNb;   /* end by time */
@@ -1051,6 +1057,26 @@
             FUZ_CHECKTEST(r >= 0, "LZ4_decompress_safe() should fail");
     }   }
 
+
+    /* to be tested with undefined sanitizer */
+    DISPLAYLEVEL(3, "LZ4_compress_default() with NULL input:");
+    {	int const maxCSize = LZ4_compressBound(0);
+        int const cSize = LZ4_compress_default(NULL, testCompressed, 0, maxCSize);
+        FUZ_CHECKTEST(!(cSize==1 && testCompressed[0]==0),
+                    "compressing empty should give byte 0"
+                    " (maxCSize == %i) (cSize == %i) (byte == 0x%02X)",
+                    maxCSize, cSize, testCompressed[0]);
+    }
+    DISPLAYLEVEL(3, " OK \n");
+
+    DISPLAYLEVEL(3, "LZ4_compress_default() with both NULL input and output:");
+    {	int const cSize = LZ4_compress_default(NULL, NULL, 0, 0);
+        FUZ_CHECKTEST(cSize != 0,
+                    "compressing into NULL must fail"
+                    " (cSize == %i !=  0)", cSize);
+    }
+    DISPLAYLEVEL(3, " OK \n");
+
     /* in-place compression test */
     DISPLAYLEVEL(3, "in-place compression using LZ4_compress_default() :");
     {   int const sampleSize = 65 KB;
@@ -1071,7 +1097,7 @@
         {   int const dSize = LZ4_decompress_safe(testCompressed, testVerify, cSize, testInputSize);
             assert(dSize == sampleSize);   /* correct size */
             {   XXH32_hash_t const crcCheck = XXH32(testVerify, (size_t)dSize, 0);
-                assert(crcCheck == crcOrig);
+                FUZ_CHECKTEST(crcCheck != crcOrig, "LZ4_decompress_safe decompression corruption");
     }   }   }
     DISPLAYLEVEL(3, " OK \n");
 
@@ -1095,32 +1121,67 @@
                 {   int const dSize = LZ4_decompress_safe(startInput, testVerify, cSize, sampleSize);
                     assert(dSize == sampleSize);   /* correct size */
                     {   XXH64_hash_t const crcCheck = XXH64(testVerify, (size_t)dSize, 0);
-                        assert(crcCheck == crcOrig);
+                        FUZ_CHECKTEST(crcCheck != crcOrig, "LZ4_decompress_safe decompression corruption");
     }   }   }   }   }
     DISPLAYLEVEL(3, " OK \n");
 
-    /* LZ4 streaming tests */
-    {   LZ4_stream_t  streamingState;
-        U64 crcOrig;
-        int result;
+    DISPLAYLEVEL(3, "LZ4_initStream with multiple valid alignments : ");
+    {   typedef struct {
+            LZ4_stream_t state1;
+            LZ4_stream_t state2;
+            char              c;
+            LZ4_stream_t state3;
+        } shct;
+        shct* const shc = (shct*)malloc(sizeof(*shc));
+        assert(shc != NULL);
+        memset(shc, 0, sizeof(*shc));
+        DISPLAYLEVEL(4, "state1(%p) state2(%p) state3(%p) LZ4_stream_t size(0x%x): ",
+                    &(shc->state1), &(shc->state2), &(shc->state3), (unsigned)sizeof(LZ4_stream_t));
+        FUZ_CHECKTEST( LZ4_initStream(&(shc->state1), sizeof(shc->state1)) == NULL, "state1 (%p) failed init", &(shc->state1) );
+        FUZ_CHECKTEST( LZ4_initStream(&(shc->state2), sizeof(shc->state2)) == NULL, "state2 (%p) failed init", &(shc->state2)  );
+        FUZ_CHECKTEST( LZ4_initStream(&(shc->state3), sizeof(shc->state3)) == NULL, "state3 (%p) failed init", &(shc->state3)  );
+        FUZ_CHECKTEST( LZ4_initStream((char*)&(shc->state1) + 1, sizeof(shc->state1)) != NULL,
+                       "hc1+1 (%p) init must fail, due to bad alignment", (char*)&(shc->state1) + 1 );
+        free(shc);
+    }
+    DISPLAYLEVEL(3, "all inits OK \n");
 
-        /* Allocation test */
-        {   LZ4_stream_t* const statePtr = LZ4_createStream();
-            FUZ_CHECKTEST(statePtr==NULL, "LZ4_createStream() allocation failed");
-            LZ4_freeStream(statePtr);
-        }
+    /* Allocation test */
+    {   LZ4_stream_t* const statePtr = LZ4_createStream();
+        FUZ_CHECKTEST(statePtr==NULL, "LZ4_createStream() allocation failed");
+        LZ4_freeStream(statePtr);
+    }
+
+    /* LZ4 streaming tests */
+    {   LZ4_stream_t streamingState;
 
         /* simple compression test */
-        crcOrig = XXH64(testInput, testCompressedSize, 0);
         LZ4_initStream(&streamingState, sizeof(streamingState));
-        result = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1);
-        FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed!");
-        FUZ_CHECKTEST(streamingState.internal_donotuse.dirty, "context should be clean")
+        {   int const cs = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1);
+            FUZ_CHECKTEST(cs==0, "LZ4_compress_fast_continue() compression failed!");
+            {   int const r = LZ4_decompress_safe(testCompressed, testVerify, cs, testCompressedSize);
+                FUZ_CHECKTEST(r!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
+        }   }
+        {   U64 const crcOrig = XXH64(testInput, testCompressedSize, 0);
+            U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
+            FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption");
+        }
 
-        result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize);
-        FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
-        { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
-          FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+        /* early saveDict */
+        DISPLAYLEVEL(3, "saveDict (right after init) : ");
+        {   LZ4_stream_t* const ctx = LZ4_initStream(&streamingState, sizeof(streamingState));
+            assert(ctx != NULL);  /* ensure init is successful */
+
+            /* Check access violation with asan */
+            FUZ_CHECKTEST( LZ4_saveDict(ctx, NULL, 0) != 0,
+            "LZ4_saveDict() can't save anything into (NULL,0)");
+
+            /* Check access violation with asan */
+            {   char tmp_buffer[240] = { 0 };
+                FUZ_CHECKTEST( LZ4_saveDict(ctx, tmp_buffer, sizeof(tmp_buffer)) != 0,
+                "LZ4_saveDict() can't save anything since compression hasn't started");
+        }   }
+        DISPLAYLEVEL(3, "OK \n");
 
         /* ring buffer test */
         {   XXH64_state_t xxhOrig;
@@ -1142,7 +1203,7 @@
             LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
             while (iNext + messageSize < testCompressedSize) {
-                int compressedSize;
+                int compressedSize; U64 crcOrig;
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
@@ -1150,15 +1211,15 @@
                 compressedSize = LZ4_compress_fast_continue(&streamingState, ringBuffer + rNext, testCompressed, (int)messageSize, testCompressedSize-ringBufferSize, 1);
                 FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_fast_continue() compression failed");
 
-                result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, (int)messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
+                { int const r = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, (int)messageSize);
+                  FUZ_CHECKTEST(r!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed"); }
 
                 XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize);
                 { U64 const crcNew = XXH64_digest(&xxhNewSafe);
                   FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
 
-                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, (int)messageSize);
-                FUZ_CHECKTEST(result!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed");
+                { int const r = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, (int)messageSize);
+                  FUZ_CHECKTEST(r!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed"); }
 
                 XXH64_update(&xxhNewFast, testVerify + dNext, messageSize);
                 { U64 const crcNew = XXH64_digest(&xxhNewFast);
@@ -1171,26 +1232,45 @@
                 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
                 if (rNext + messageSize > ringBufferSize) rNext = 0;
                 if (dNext + messageSize > dBufferSize) dNext = 0;
-            }
-        }
+        }   }
     }
 
+    DISPLAYLEVEL(3, "LZ4_initStreamHC with multiple valid alignments : ");
+    {   typedef struct {
+            LZ4_streamHC_t hc1;
+            LZ4_streamHC_t hc2;
+            char             c;
+            LZ4_streamHC_t hc3;
+        } shct;
+        shct* const shc = (shct*)malloc(sizeof(*shc));
+        assert(shc != NULL);
+        memset(shc, 0, sizeof(*shc));
+        DISPLAYLEVEL(4, "hc1(%p) hc2(%p) hc3(%p) size(0x%x): ",
+                    &(shc->hc1), &(shc->hc2), &(shc->hc3), (unsigned)sizeof(LZ4_streamHC_t));
+        FUZ_CHECKTEST( LZ4_initStreamHC(&(shc->hc1), sizeof(shc->hc1)) == NULL, "hc1 (%p) failed init", &(shc->hc1) );
+        FUZ_CHECKTEST( LZ4_initStreamHC(&(shc->hc2), sizeof(shc->hc2)) == NULL, "hc2 (%p) failed init", &(shc->hc2)  );
+        FUZ_CHECKTEST( LZ4_initStreamHC(&(shc->hc3), sizeof(shc->hc3)) == NULL, "hc3 (%p) failed init", &(shc->hc3)  );
+        FUZ_CHECKTEST( LZ4_initStreamHC((char*)&(shc->hc1) + 1, sizeof(shc->hc1)) != NULL,
+                        "hc1+1 (%p) init must fail, due to bad alignment", (char*)&(shc->hc1) + 1 );
+        free(shc);
+    }
+    DISPLAYLEVEL(3, "all inits OK \n");
+
     /* LZ4 HC streaming tests */
     {   LZ4_streamHC_t sHC;   /* statically allocated */
-        U64 crcOrig;
         int result;
         LZ4_initStreamHC(&sHC, sizeof(sHC));
 
         /* Allocation test */
-        DISPLAYLEVEL(3, " Basic HC allocation : ");
+        DISPLAYLEVEL(3, "Basic HC allocation : ");
         {   LZ4_streamHC_t* const sp = LZ4_createStreamHC();
             FUZ_CHECKTEST(sp==NULL, "LZ4_createStreamHC() allocation failed");
             LZ4_freeStreamHC(sp);
         }
-        DISPLAYLEVEL(3, " OK \n");
+        DISPLAYLEVEL(3, "OK \n");
 
         /* simple HC compression test */
-        DISPLAYLEVEL(3, " Simple HC round-trip : ");
+        DISPLAYLEVEL(3, "Simple HC round-trip : ");
         {   U64 const crc64 = XXH64(testInput, testCompressedSize, 0);
             LZ4_setCompressionLevel(&sHC, compressionLevel);
             result = LZ4_compress_HC_continue(&sHC, testInput, testCompressed, testCompressedSize, testCompressedSize-1);
@@ -1202,10 +1282,26 @@
             {   U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
                 FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() decompression corruption");
         }   }
-        DISPLAYLEVEL(3, " OK \n");
+        DISPLAYLEVEL(3, "OK \n");
+
+        /* saveDictHC test #926 */
+        DISPLAYLEVEL(3, "saveDictHC test #926 : ");
+        {   LZ4_streamHC_t* const ctx = LZ4_initStreamHC(&sHC, sizeof(sHC));
+            assert(ctx != NULL);  /* ensure init is successful */
+
+            /* Check access violation with asan */
+            FUZ_CHECKTEST( LZ4_saveDictHC(ctx, NULL, 0) != 0,
+            "LZ4_saveDictHC() can't save anything into (NULL,0)");
+
+            /* Check access violation with asan */
+            {   char tmp_buffer[240] = { 0 };
+                FUZ_CHECKTEST( LZ4_saveDictHC(ctx, tmp_buffer, sizeof(tmp_buffer)) != 0,
+                "LZ4_saveDictHC() can't save anything since compression hasn't started");
+        }   }
+        DISPLAYLEVEL(3, "OK \n");
 
         /* long sequence test */
-        DISPLAYLEVEL(3, " Long sequence HC test : ");
+        DISPLAYLEVEL(3, "Long sequence HC_destSize test : ");
         {   size_t const blockSize = 1 MB;
             size_t const targetSize = 4116;  /* size carefully selected to trigger an overflow */
             void*  const block = malloc(blockSize);
@@ -1223,8 +1319,13 @@
             assert(targetSize < INT_MAX);
             result = LZ4_compress_HC_destSize(&sHC, (const char*)block, (char*)dstBlock, &srcSize, (int)targetSize, 3);
             DISPLAYLEVEL(4, "cSize=%i; readSize=%i; ", result, srcSize);
-            FUZ_CHECKTEST(result!=4116, "LZ4_compress_HC_destSize() : compression must fill dstBuffer completely, but no more !");
-            FUZ_CHECKTEST(((char*)dstBlock)[targetSize] != sentinel, "LZ4_compress_HC_destSize()")
+            FUZ_CHECKTEST(result != 4116, "LZ4_compress_HC_destSize() : "
+                "compression (%i->%i) must fill dstBuffer (%i) exactly",
+                srcSize, result, (int)targetSize);
+            FUZ_CHECKTEST(((char*)dstBlock)[targetSize] != sentinel,
+                "LZ4_compress_HC_destSize() overwrites dst buffer");
+            FUZ_CHECKTEST(srcSize < 1045000, "LZ4_compress_HC_destSize() doesn't compress enough"
+                " (%i -> %i , expected > %i)", srcSize, result, 1045000);
 
             LZ4_resetStreamHC_fast(&sHC, 3);   /* make sure the context is clean after the test */
             free(block);
@@ -1233,7 +1334,7 @@
         DISPLAYLEVEL(3, " OK \n");
 
         /* simple dictionary HC compression test */
-        DISPLAYLEVEL(3, " HC dictionary compression test : ");
+        DISPLAYLEVEL(3, "HC dictionary compression test : ");
         {   U64 const crc64 = XXH64(testInput + 64 KB, testCompressedSize, 0);
             LZ4_resetStreamHC_fast(&sHC, compressionLevel);
             LZ4_loadDictHC(&sHC, testInput, 64 KB);
@@ -1302,6 +1403,7 @@
             XXH64_reset(&crcNewState, 0);
 
             while (segStart + segSize < testInputSize) {
+                XXH64_hash_t crcOrig;
                 XXH64_update(&crcOrigState, testInput + segStart, segSize);
                 crcOrig = XXH64_digest(&crcOrigState);
                 assert(segSize <= INT_MAX);
@@ -1348,6 +1450,7 @@
 
             while (iNext + messageSize < testCompressedSize) {
                 int compressedSize;
+                XXH64_hash_t crcOrig;
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
@@ -1362,7 +1465,7 @@
                 FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
 
                 XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                { XXH64_hash_t const crcNew = XXH64_digest(&xxhNewSafe);
                   FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
 
                 assert(messageSize < INT_MAX);
@@ -1370,7 +1473,7 @@
                 FUZ_CHECKTEST(result!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed");
 
                 XXH64_update(&xxhNewFast, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNewFast);
+                { XXH64_hash_t const crcNew = XXH64_digest(&xxhNewFast);
                   FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
 
                 /* prepare next message */
@@ -1391,6 +1494,7 @@
          */
         {   XXH64_state_t xxhOrig;
             XXH64_state_t xxhNewSafe, xxhNewFast;
+            XXH64_hash_t crcOrig;
             LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
             const int maxMessageSizeLog = 12;
             const int maxMessageSize = 1 << maxMessageSizeLog;
@@ -1487,9 +1591,88 @@
                 iNext = (FUZ_rand(&randState) & 65535);
                 if (dNext + maxMessageSize > dBufferSize) dNext = 0;
             }
-        }
+        }  /* Ring buffer test : Non synchronized decoder */
     }
 
+    DISPLAYLEVEL(3, "LZ4_compress_HC_destSize : ");
+    /* encode congenerical sequence test for HC compressors */
+    {   LZ4_streamHC_t* const sHC = LZ4_createStreamHC();
+        int const src_buf_size = 3 MB;
+        int const dst_buf_size = 6 KB;
+        int const payload = 0;
+        int const dst_step = 43;
+        int const dst_min_len = 33 + (FUZ_rand(&randState) % dst_step);
+        int const dst_max_len = 5000;
+        int slen, dlen;
+        char* sbuf1 = (char*)malloc(src_buf_size + 1);
+        char* sbuf2 = (char*)malloc(src_buf_size + 1);
+        char* dbuf1 = (char*)malloc(dst_buf_size + 1);
+        char* dbuf2 = (char*)malloc(dst_buf_size + 1);
+
+        assert(sHC != NULL);
+        assert(dst_buf_size > dst_max_len);
+        if (!sbuf1 || !sbuf2 || !dbuf1 || !dbuf2) {
+            EXIT_MSG("not enough memory for FUZ_unitTests (destSize)");
+        }
+        for (dlen = dst_min_len; dlen <= dst_max_len; dlen += dst_step) {
+            int src_len = (dlen - 10)*255 + 24;
+            if (src_len + 10 >= src_buf_size) break;   /* END of check */
+            for (slen = src_len - 3; slen <= src_len + 3; slen++) {
+                int srcsz1, srcsz2;
+                int dsz1, dsz2;
+                int res1, res2;
+                char const endchk = (char)0x88;
+                DISPLAYLEVEL(5, "slen = %i, ", slen);
+
+                srcsz1 = slen;
+                memset(sbuf1, payload, slen);
+                memset(dbuf1, 0, dlen);
+                dbuf1[dlen] = endchk;
+                dsz1 = LZ4_compress_destSize(sbuf1, dbuf1, &srcsz1, dlen);
+                DISPLAYLEVEL(5, "LZ4_compress_destSize: %i bytes compressed into %i bytes, ", srcsz1, dsz1);
+                DISPLAYLEVEL(5, "last token : 0x%0X, ", dbuf1[dsz1 - 6]);
+                DISPLAYLEVEL(5, "last ML extra lenbyte : 0x%0X, \n", dbuf1[dsz1 - 7]);
+                FUZ_CHECKTEST(dbuf1[dlen] != endchk, "LZ4_compress_destSize() overwrite dst buffer !");
+                FUZ_CHECKTEST(dsz1 <= 0,             "LZ4_compress_destSize() compression failed");
+                FUZ_CHECKTEST(dsz1 > dlen,           "LZ4_compress_destSize() result larger than dst buffer !");
+                FUZ_CHECKTEST(srcsz1 > slen,         "LZ4_compress_destSize() read more than src buffer !");
+
+                res1 = LZ4_decompress_safe(dbuf1, sbuf1, dsz1, src_buf_size);
+                FUZ_CHECKTEST(res1 != srcsz1,        "LZ4_compress_destSize() decompression failed!");
+
+                srcsz2 = slen;
+                memset(sbuf2, payload, slen);
+                memset(dbuf2, 0, dlen);
+                dbuf2[dlen] = endchk;
+                LZ4_resetStreamHC(sHC, compressionLevel);
+                dsz2 = LZ4_compress_HC_destSize(sHC, sbuf2, dbuf2, &srcsz2, dlen, compressionLevel);
+                DISPLAYLEVEL(5, "LZ4_compress_HC_destSize: %i bytes compressed into %i bytes, ", srcsz2, dsz2);
+                DISPLAYLEVEL(5, "last token : 0x%0X, ", dbuf2[dsz2 - 6]);
+                DISPLAYLEVEL(5, "last ML extra lenbyte : 0x%0X, \n", dbuf2[dsz2 - 7]);
+                FUZ_CHECKTEST(dbuf2[dlen] != endchk,      "LZ4_compress_HC_destSize() overwrite dst buffer !");
+                FUZ_CHECKTEST(dsz2 <= 0,                  "LZ4_compress_HC_destSize() compression failed");
+                FUZ_CHECKTEST(dsz2 > dlen,                "LZ4_compress_HC_destSize() result larger than dst buffer !");
+                FUZ_CHECKTEST(srcsz2 > slen,              "LZ4_compress_HC_destSize() read more than src buffer !");
+                FUZ_CHECKTEST(dsz2 != dsz1,               "LZ4_compress_HC_destSize() return incorrect result !");
+                FUZ_CHECKTEST(srcsz2 != srcsz1,           "LZ4_compress_HC_destSize() return incorrect src buffer size "
+                                                          ": srcsz2(%i) != srcsz1(%i)", srcsz2, srcsz1);
+                FUZ_CHECKTEST(memcmp(dbuf2, dbuf1, (size_t)dsz2), "LZ4_compress_HC_destSize() return incorrect data into dst buffer !");
+
+                res2 = LZ4_decompress_safe(dbuf2, sbuf1, dsz2, src_buf_size);
+                FUZ_CHECKTEST(res2 != srcsz1,             "LZ4_compress_HC_destSize() decompression failed!");
+
+                FUZ_CHECKTEST(memcmp(sbuf1, sbuf2, (size_t)res2), "LZ4_compress_HC_destSize() decompression corruption!");
+            }
+        }
+        LZ4_freeStreamHC(sHC);
+        free(sbuf1);
+        free(sbuf2);
+        free(dbuf1);
+        free(dbuf2);
+    }
+    DISPLAYLEVEL(3, " OK \n");
+
+
     /* clean up */
     free(testInput);
     free(testCompressed);
diff --git a/tests/test_install.sh b/tests/test_install.sh
index f9de402..122bac5 100755
--- a/tests/test_install.sh
+++ b/tests/test_install.sh
@@ -1,7 +1,15 @@
 #/usr/bin/env sh
 set -e
 
+
 make="make -C $lz4_root"
+unamestr=$(uname)
+if [ "$unamestr" = 'Linux' ]; then
+  make="make -C $lz4_root"
+elif [ "$unamestr" = 'FreeBSD' -o "$unamestr" = 'OpenBSD' ]; then
+  make="gmake -C $lz4_root"
+fi
+
 for cmd in install uninstall; do
   for upper in DUMMY PREFIX EXEC_PREFIX LIBDIR INCLUDEDIR PKGCONFIGDIR BINDIR MANDIR MAN1DIR ; do
     lower=$(echo $upper | tr '[:upper:]' '[:lower:]')
diff --git a/tmp b/tmp
new file mode 100644
index 0000000..c97c12f
--- /dev/null
+++ b/tmp
Binary files differ
diff --git a/tmpsparse b/tmpsparse
new file mode 100644
index 0000000..c97c12f
--- /dev/null
+++ b/tmpsparse
Binary files differ
diff --git a/visual/.gitignore b/visual/.gitignore
deleted file mode 100644
index 276f8f5..0000000
--- a/visual/.gitignore
+++ /dev/null
@@ -1,10 +0,0 @@
-# Visual C++
-.vs/
-*Copy
-*.db
-*.opensdf
-*.sdf
-*.suo
-*.user
-ver*/
-VS2010/bin/