Upgrade zstd to v1.5.0 am: ddc8731991 am: 62c8893917

Original change: https://android-review.googlesource.com/c/platform/external/zstd/+/1823960

Change-Id: Ic010c19fbaf24c1e3d746669f99d14730a006eaf
diff --git a/.circleci/config.yml b/.circleci/config.yml
index bcf2e1d..c634737 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -31,38 +31,11 @@
           command: |
             make gnu90build; make clean
             make gnu99build; make clean
-            make ppc64build; make clean
-            make ppcbuild  ; make clean
-            make armbuild  ; make clean
+            make ppc64build V=1; make clean
+            make ppcbuild   V=1; make clean
+            make armbuild   V=1; make clean
             make -C tests test-legacy test-longmatch; make clean
             make -C lib libzstd-nomt; make clean
-  # This step is only run on release tags.
-  # It publishes the source tarball as artifacts and if the GITHUB_TOKEN
-  # environment variable is set it will publish the source tarball to the
-  # tagged release.
-  publish-github-release:
-    docker:
-      - image: fbopensource/zstd-circleci-primary:0.0.1
-    environment:
-      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
-    steps:
-      - checkout
-      - run:
-          name: Publish
-          command: |
-            export VERSION=$(echo $CIRCLE_TAG | tail -c +2)
-            export ZSTD_VERSION=zstd-$VERSION
-            git archive $CIRCLE_TAG --prefix $ZSTD_VERSION/ --format tar \
-                        -o $ZSTD_VERSION.tar
-            sha256sum $ZSTD_VERSION.tar > $ZSTD_VERSION.tar.sha256
-            zstd -19 $ZSTD_VERSION.tar
-            sha256sum $ZSTD_VERSION.tar.zst > $ZSTD_VERSION.tar.zst.sha256
-            gzip -k -9 $ZSTD_VERSION.tar
-            sha256sum $ZSTD_VERSION.tar.gz > $ZSTD_VERSION.tar.gz.sha256
-            mkdir -p $CIRCLE_ARTIFACTS
-            cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS
-      - store_artifacts:
-          path: /tmp/circleci-artifacts
   # This step should only be run in a cron job
   regression-test:
     docker:
@@ -143,8 +116,9 @@
           filters:
             branches:
               only:
-                - master
+                - release
                 - dev
+                - master
     jobs:
       # Run daily long regression tests
       - regression-test
diff --git a/.cirrus.yml b/.cirrus.yml
index 8387ca1..fe17aac 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -2,7 +2,7 @@
   name: FreeBSD (shortest)
   freebsd_instance:
     matrix:
-      image_family: freebsd-12-1
+      image_family: freebsd-12-2
       # The stable 11.3 image causes "Agent is not responding" so use a snapshot
       image_family: freebsd-11-3-snap
   install_script: pkg install -y gmake coreutils
diff --git a/.github/workflows/generic-dev.yml b/.github/workflows/generic-dev.yml
index bb88de5..ae4ee7e 100644
--- a/.github/workflows/generic-dev.yml
+++ b/.github/workflows/generic-dev.yml
@@ -2,14 +2,13 @@
 
 on:
   pull_request:
-    branches: [ dev, master, actionsTest ]
+    branches: [ dev, release, actionsTest ]
 
 jobs:
 
 # Dev PR jobs that still have to be migrated from travis
 #
-# icc (need self-hosted)
-# versionTag
+# versionTag (only on release tags)
 # valgrindTest (keeps failing for some reason. need investigation)
 # staticAnalyze (need trusty so need self-hosted)
 # pcc-fuzz: (need trusty so need self-hosted)
@@ -19,7 +18,7 @@
 # I need admins permissions to the repo for that it looks like
 # So I'm tabling that for now
 #
-# The master branch exclusive jobs will be in a separate
+# The release branch exclusive jobs will be in a separate
 # workflow file (the osx tests and meson build that is)
 
   benchmarking:
@@ -31,27 +30,40 @@
 
   test:
     runs-on: ubuntu-latest
+    env:
+      DEVNULLRIGHTS: 1
+      READFROMBLOCKDEVICE: 1
     steps:
     - uses: actions/checkout@v2
     - name: make test
       run: make test
 
-  gcc-6-7-libzstd:
+  check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: gcc-6 + gcc-7 + libzstdmt compilation
+    - name: make check on 32-bit
       run: |
-        make gcc6install gcc7install
-        CC=gcc-6 CFLAGS=-Werror make -j all
-        make clean
+        sudo apt update
+        APT_PACKAGES="gcc-multilib" make apt-install
+        CFLAGS="-m32 -O1 -fstack-protector" make check V=1
+
+  gcc-7-libzstd:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: gcc-7 + libzstdmt compilation
+      run: |
+        make gcc7install
         CC=gcc-7 CFLAGS=-Werror make -j all
         make clean
         LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
-        make -C tests zbufftest-dll
+
+    # candidate test (to check) : underlink test
+    # LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
 
   gcc-8-asan-ubsan-testzstd:
-    runs-on: ubuntu-16.04 # fails on 18.04
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
     - name: gcc-8 + ASan + UBSan + Test Zstd
@@ -59,30 +71,32 @@
         make gcc8install
         CC=gcc-8 CFLAGS="-Werror" make -j all
         make clean
-        CC=gcc-8 make -j uasan-test-zstd </dev/null
+        CC=gcc-8 make -j uasan-test-zstd </dev/null V=1
 
-  gcc-6-asan-ubsan-testzstd-32bit:
+  gcc-asan-ubsan-testzstd-32bit:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: gcc-6 + ASan + UBSan + Test Zstd, 32bit mode
+    - name: ASan + UBSan + Test Zstd, 32bit mode
       run: |
-        make gcc6install libc6install
-        CC=gcc-6 CFLAGS="-Werror -m32" make -j all32
+        make libc6install
+        CFLAGS="-Werror -m32" make -j all32
         make clean
-        CC=gcc-6 make -j uasan-test-zstd32
+        make -j uasan-test-zstd32 V=1
 
-  clang-38-msan-testzstd:
-    runs-on: ubuntu-16.04 # fails on 18.04
+  clang-msan-testzstd:
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: clang-3.8 + MSan + Test Zstd
+    - name: clang + MSan + Test Zstd
       run: |
-        # make clang38install (doesn't work)
-        sudo apt-add-repository "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-3.8 main"
         sudo apt-get update
-        sudo apt-get install clang-3.8
-        CC=clang-3.8 make clean msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0
+        sudo apt-get install clang
+        CC=clang make msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0 V=1
+
+    # Note : external libraries must be turned off when using MSAN tests,
+    # because they are not msan-instrumented,
+    # so any data coming from these libraries is always considered "uninitialized"
 
   cmake-build-and-test-check:
     runs-on: ubuntu-latest
@@ -104,26 +118,24 @@
         make gcc8install
         CC=gcc-8 FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
 
-  gcc-6-asan-ubsan-fuzz32:
+  gcc-asan-ubsan-fuzz32:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: gcc-6 + ASan + UBSan + Fuzz Test 32bit
+    - name: ASan + UBSan + Fuzz Test 32bit
       run: |
-        make gcc6install libc6install
-        CC=gcc-6 CFLAGS="-O2 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
+        make libc6install
+        CFLAGS="-O2 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
 
-  clang-38-msan-fuzz:
-    runs-on: ubuntu-16.04 # fails on 18.04
+  clang-msan-fuzz:
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: clang-3.8 + MSan + Fuzz Test
+    - name: clang + MSan + Fuzz Test
       run: |
-        # make clang38install (doesn't work)
-        sudo apt-add-repository "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-3.8 main"
         sudo apt-get update
-        sudo apt-get install clang-3.8
-        CC=clang-3.8 make clean msan-fuzztest
+        sudo apt-get install clang
+        CC=clang FUZZER_FLAGS="--long-tests" make clean msan-fuzztest
 
   asan-ubsan-msan-regression:
     runs-on: ubuntu-latest
@@ -147,7 +159,7 @@
         make clean
         make c99build
         make clean
-        make travis-install
+        make travis-install   # just ensures `make install` works
 
   mingw-cross-compilation:
     runs-on: ubuntu-latest
@@ -189,21 +201,25 @@
         tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz
         shellcheck-v0.7.1/shellcheck --shell=sh --severity=warning --exclude=SC2010 tests/playTests.sh
 
-  icc:
-    name: icc-check
-    runs-on: ubuntu-latest
-    steps:
-    - name: install icc
-      run: |
-        export DEBIAN_FRONTEND=noninteractive
-        sudo apt-get -qqq update
-        sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg
-        sudo wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
-        sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
-        sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-        sudo apt-get update
-        sudo apt-get install -y intel-basekit intel-hpckit
-    - uses: actions/checkout@v2
-    - name: make check
-      run: |
-        make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
+# For reference : icc tests
+# icc tests are currently failing on Github Actions, likely to issues during installation stage
+# To be fixed later
+#
+#  icc:
+#    name: icc-check
+#    runs-on: ubuntu-latest
+#    steps:
+#    - name: install icc
+#      run: |
+#        export DEBIAN_FRONTEND=noninteractive
+#        sudo apt-get -qqq update
+#        sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg
+#        sudo wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+#        sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+#        sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+#        sudo apt-get update
+#        sudo apt-get install -y intel-basekit intel-hpckit
+#    - uses: actions/checkout@v2
+#    - name: make check
+#      run: |
+#        make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
diff --git a/.github/workflows/generic-release.yml b/.github/workflows/generic-release.yml
index de4a1cb..cb91fb2 100644
--- a/.github/workflows/generic-release.yml
+++ b/.github/workflows/generic-release.yml
@@ -2,10 +2,10 @@
 
 on:
   pull_request:
-    # This will eventually only be for pushes to master
+    # This will eventually only be for pushes to release
     # but for dogfooding purposes, I'm running it even
     # on dev pushes
-    branches: [ dev, master, actionsTest ]
+    branches: [ dev, release, actionsTest ]
 
 jobs:
   # missing jobs
@@ -25,32 +25,22 @@
         make test
         # make -c lib all (need to fix. not working right now)
 
-  zbuff:
-    runs-on: ubuntu-16.04
-    steps:
-    - uses: actions/checkout@v2
-    - name: zbuff test
-      run: |
-        make -C tests test-zbuff
-
   tsan:
-    runs-on: ubuntu-16.04
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
     - name: thread sanitizer
       run: |
-        sudo apt-add-repository "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-3.8 main"
-        sudo apt-get update
-        sudo apt-get install clang-3.8
-        CC=clang-3.8 make tsan-test-zstream
-        CC=clang-3.8 make tsan-fuzztest
+        CC=clang make tsan-test-zstream
+        CC=clang make tsan-fuzztest
+
   zlib-wrapper:
     runs-on: ubuntu-16.04
     steps:
     - uses: actions/checkout@v2
     - name: zlib wrapper test
       run: |
-        make gpp6install valgrindinstall
+        make valgrindinstall
         make -C zlibWrapper test
         make -C zlibWrapper valgrindTest
 
diff --git a/.github/workflows/linux-kernel.yml b/.github/workflows/linux-kernel.yml
index 35871ff..124f777 100644
--- a/.github/workflows/linux-kernel.yml
+++ b/.github/workflows/linux-kernel.yml
@@ -2,7 +2,7 @@
 
 on:
   pull_request:
-    branches: [ dev, master, actionsTest ]
+    branches: [ dev, release, actionsTest ]
 
 jobs:
   test:
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 0e14345..5e5aae1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -6,7 +6,7 @@
     strategy:
       fail-fast: false
       matrix:
-        sanitizer: [address, undefined, memory]
+        sanitizer: [address, undefined]
     steps:
     - name: Build Fuzzers (${{ matrix.sanitizer }})
       id: build
diff --git a/.github/workflows/publish-release-artifacts.yml b/.github/workflows/publish-release-artifacts.yml
new file mode 100644
index 0000000..952cb26
--- /dev/null
+++ b/.github/workflows/publish-release-artifacts.yml
@@ -0,0 +1,68 @@
+name: publish-release-artifacts
+
+on:
+  release:
+    types:
+      - created
+
+jobs:
+  publish-release-artifacts:
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/')
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Archive
+        env:
+          RELEASE_SIGNING_KEY: ${{ secrets.RELEASE_SIGNING_KEY }}
+          RELEASE_SIGNING_KEY_PASSPHRASE: ${{ secrets.RELEASE_SIGNING_KEY_PASSPHRASE }}
+        run: |
+          # compute file name
+          export TAG="$(echo "$GITHUB_REF" | sed -n 's_^refs/tags/__p')"
+          if [ -z "$TAG" ]; then
+            echo "action must be run on a tag. GITHUB_REF is not a tag: $GITHUB_REF"
+            exit 1
+          fi
+          # Attempt to extract "1.2.3" from "v1.2.3" to maintain artifact name backwards compat.
+          # Otherwise, degrade to using full tag.
+          export VERSION="$(echo "$TAG" | sed 's_^v\([0-9]\+\.[0-9]\+\.[0-9]\+\)$_\1_')"
+          export ZSTD_VERSION="zstd-$VERSION"
+
+          # archive
+          git archive $TAG \
+              --prefix $ZSTD_VERSION/ \
+              --format tar \
+              -o $ZSTD_VERSION.tar
+
+          # Do the rest of the work in a sub-dir so we can glob everything we want to publish.
+          mkdir artifacts/
+          mv $ZSTD_VERSION.tar artifacts/
+          cd artifacts/
+
+          # compress
+          zstd -k -19 $ZSTD_VERSION.tar
+          gzip -k  -9 $ZSTD_VERSION.tar
+
+          # we only publish the compressed tarballs
+          rm $ZSTD_VERSION.tar
+
+          # hash
+          sha256sum $ZSTD_VERSION.tar.zst > $ZSTD_VERSION.tar.zst.sha256
+          sha256sum $ZSTD_VERSION.tar.gz  > $ZSTD_VERSION.tar.gz.sha256
+
+          # sign
+          if [ -n "$RELEASE_SIGNING_KEY" ]; then
+            export GPG_BATCH_OPTS="--batch --no-use-agent --pinentry-mode loopback --no-tty --yes"
+            echo "$RELEASE_SIGNING_KEY" | gpg $GPG_BATCH_OPTS --import
+            gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.zst.sig $ZSTD_VERSION.tar.zst
+            gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.gz.sig  $ZSTD_VERSION.tar.gz
+          fi
+
+      - name: Publish
+        uses: skx/github-action-publish-binaries@release-1.3
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          args: artifacts/*
diff --git a/.gitignore b/.gitignore
index ae277e9..ea574d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,5 @@
 *.code-workspace
 compile_commands.json
 .clangd
+perf.data
+perf.data.old
diff --git a/.travis.yml b/.travis.yml
index 226d4c0..b0e7048 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,6 +8,7 @@
 branches:
   only:
   - dev
+  - release
   - master
   - travisTest
 
@@ -31,49 +32,6 @@
       script:
         - make check
 
-    - name: make benchmarking
-      script:
-        - make benchmarking
-
-    - name: make test (complete)
-      script:
-        # DEVNULLRIGHTS : will request sudo rights to test permissions on /dev/null
-        - DEVNULLRIGHTS=test make test
-
-    - name: gcc-6 + gcc-7 + libzstdmt compilation   # ~ 6mn
-      script:
-        - make gcc6install gcc7install
-        - CC=gcc-6 CFLAGS=-Werror make -j all
-        - make clean
-        - CC=gcc-7 CFLAGS=-Werror make -j all
-        - make clean
-        - LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
-        - make -C tests zbufftest-dll
-        # LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
-        # zbufftest-dll : test that a user program can link to multi-threaded libzstd without specifying -pthread
-
-    - name: gcc-8 + ASan + UBSan + Test Zstd   # ~6.5mn
-      script:
-        - make gcc8install
-        - CC=gcc-8 CFLAGS="-Werror" make -j all
-        - make clean
-        - CC=gcc-8 make -j uasan-test-zstd </dev/null   # test when stdin is not a tty
-
-    - name: gcc-6 + ASan + UBSan + Test Zstd, 32bit mode    # ~4mn
-      script:
-        - make gcc6install libc6install
-        - CC=gcc-6 CFLAGS="-Werror -m32" make -j all32
-        - make clean
-        - CC=gcc-6 make -j uasan-test-zstd32   # note : can complain about pointer overflow
-
-    - name: clang-3.8 + MSan + Test Zstd    # ~3.5mn
-      script:
-        - make clang38install
-        # External libraries must be turned off when using MSAN tests,
-        # because they are not msan-instrumented,
-        # so any data coming from these libraries is always considered "uninitialized"
-        - CC=clang-3.8 make clean msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0
-
     - name: Minimal Decompressor Macros    # ~5mn
       script:
         - make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
@@ -85,51 +43,11 @@
         - make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
         - make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
 
-    - name: cmake build and test check    # ~6mn
-      script:
-        - make cmakebuild
-
     - name: static analyzer scanbuild    # ~26mn
       dist: trusty  # note : it's important to pin down a version of static analyzer, since different versions report different false positives
       script:
         - make staticAnalyze
 
-    - name: gcc-8 + ASan + UBSan + Fuzz Test    # ~19mn
-      script:
-        - make gcc8install
-        - CC=gcc-8 make clean uasan-fuzztest
-
-    - name: gcc-6 + ASan + UBSan + Fuzz Test 32bit    # ~15.5mn
-      script:
-        - make gcc6install libc6install
-        - CC=gcc-6 CFLAGS="-O2 -m32" make uasan-fuzztest   # can complain about pointer overflow
-
-    - name: clang-3.8 + MSan + Fuzz Test     # ~14.5mn
-      script:
-        - make clang38install
-        - CC=clang-3.8 make clean msan-fuzztest
-
-    - name: ASan + UBSan + MSan + Regression Test    # ~ 4.5mn
-      script:
-        - make -j uasanregressiontest
-        - make clean
-        - make -j msanregressiontest
-
-    - name: C++, gnu90 and c99 compatibility   # ~3mn
-      script:
-        - make cxxtest
-        - make clean
-        - make gnu90build
-        - make clean
-        - make c99build
-        - make clean
-        - make travis-install    # just ensures `make install` works
-
-    - name: mingw cross-compilation
-      script :
-        - sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix;
-        - CC=x86_64-w64-mingw32-gcc CXX=x86_64-w64-mingw32-g++ CFLAGS="-Werror -O1" make zstd
-
     - name: Valgrind + Fuzz Test Stack Mode    # ~ 7mn
       script:
         - make valgrindinstall
@@ -162,33 +80,28 @@
         - make -C tests checkTag
         - tests/checkTag "$TRAVIS_BRANCH"
 
-    # tests for master branch and cron job only
+    # tests for release branch and cron job only
     - name: OS-X    # ~13mn
-      if: branch = master
+      if: branch = release
       os: osx
       script:
         - make test
         - make -C lib all
 
-    - name: zbuff test
-      if: branch = master
-      script:
-        - make -C tests test-zbuff
-
     - name: Versions Compatibility Test   # 11.5mn
-      if: branch = master
+      if: branch = release
       script:
         - make -C tests versionsTest
 
     - name: thread sanitizer   # ~29mn
-      if: branch = master
+      if: branch = release
       script:
         - make clang38install
         - CC=clang-3.8 make tsan-test-zstream
         - CC=clang-3.8 make tsan-fuzztest
 
     - name: PPC64LE + Fuzz test  # ~13mn
-      if: branch = master
+      if: branch = release
       arch: ppc64le
       script:
         - cat /proc/cpuinfo
@@ -196,41 +109,23 @@
 
     - name: Qemu PPC64 + Fuzz test  # ~13mn, presumed Big-Endian (?)
       dist: trusty  # note : PPC64 cross-compilation for Qemu tests seems broken on Xenial
-      if: branch = master
+      if: branch = release
       script:
         - make ppcinstall
         - make ppc64fuzz
 
     # note : we already have aarch64 tests on hardware
     - name: Qemu aarch64 + Fuzz Test (on Xenial)    # ~14mn
-      if: branch = master
+      if: branch = release
       dist: xenial
       script:
         - make arminstall
         - make aarch64fuzz
 
-    - name: zlib wrapper test    # ~7.5mn
-      if: branch = master
-      script:
-        - make gpp6install valgrindinstall
-        - make -C zlibWrapper test
-        - make -C zlibWrapper valgrindTest
-
-    - name: LZ4, thread pool, and partial libs tests    # ~4mn
-      if: branch = master
-      script:
-        - make lz4install
-        - make -C tests test-lz4
-        - make check < /dev/null | tee    # mess with lz4 console detection
-        - make clean
-        - make -C tests test-pool
-        - make clean
-        - bash tests/libzstd_partial_builds.sh
-
     # meson dedicated test
     - name: Xenial (Meson + clang)    # ~15mn
-      if: branch = master
-      dist: xenial
+      if: branch = release
+      dist: bionic
       language: cpp
       compiler: clang
       install:
diff --git a/CHANGELOG b/CHANGELOG
index 562211c..3b15165 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,89 @@
-v1.4.7
+v1.5.0  (May 11, 2021)
+api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42)
+  `ZSTD_defaultCLevel()`
+  `ZSTD_getDictID_fromCDict()`
+api: Several experimental functions have been deprecated and will emit a compiler warning (#2582, @senhuang42)
+  `ZSTD_compress_advanced()`
+  `ZSTD_compress_usingCDict_advanced()`
+  `ZSTD_compressBegin_advanced()`
+  `ZSTD_compressBegin_usingCDict_advanced()`
+  `ZSTD_initCStream_srcSize()`
+  `ZSTD_initCStream_usingDict()`
+  `ZSTD_initCStream_usingCDict()`
+  `ZSTD_initCStream_advanced()`
+  `ZSTD_initCStream_usingCDict_advanced()`
+  `ZSTD_resetCStream()`
+api: ZSTDMT_NBWORKERS_MAX reduced to 64 for 32-bit environments (@Cyan4973)
+perf: Significant speed improvements for middle compression levels (#2494, @senhuang42 @terrelln)
+perf: Block splitter to improve compression ratio, enabled by default for high compression levels (#2447, @senhuang42)
+perf: Decompression loop refactor, speed improvements on `clang` and for `--long` modes (#2614 #2630, @Cyan4973)
+perf: Reduced stack usage during compression and decompression entropy stage (#2522 #2524, @terrelln)
+bug: Improve setting permissions of created files (#2525, @felixhandte)
+bug: Fix large dictionary non-determinism (#2607, @terrelln)
+bug: Fix non-determinism test failures on Linux i686 (#2606, @terrelln)
+bug: Fix various dedicated dictionary search bugs (#2540 #2586, @senhuang42 @felixhandte)
+bug: Ensure `ZSTD_estimateCCtxSize*() `monotonically increases with compression level (#2538, @senhuang42)
+bug: Fix --patch-from mode parameter bound bug with small files (#2637, @occivink)
+bug: Fix UBSAN error in decompression (#2625, @terrelln)
+bug: Fix superblock compression divide by zero bug (#2592, @senhuang42)
+bug: Make the number of physical CPU cores detection more robust (#2517, @PaulBone)
+doc: Improve `zdict.h` dictionary training API documentation (#2622, @terrelln)
+doc: Note that public `ZSTD_free*()` functions accept NULL pointers (#2521, @animalize)
+doc: Add style guide docs for open source contributors (#2626, @Cyan4973)
+tests: Better regression test coverage for different dictionary modes (#2559, @senhuang42)
+tests: Better test coverage of index reduction (#2603, @terrelln)
+tests: OSS-Fuzz coverage for seekable format (#2617, @senhuang42)
+tests: Test coverage for ZSTD threadpool API (#2604, @senhuang42)
+build: Dynamic library built multithreaded by default (#2584, @senhuang42)
+build: Move  `zstd_errors.h`  and  `zdict.h`  to  `lib/`  root (#2597, @terrelln)
+build: Allow `ZSTDMT_JOBSIZE_MIN` to be configured at compile-time, reduce default to 512KB (#2611, @Cyan4973)
+build: Single file library build script moved to `build/` directory (#2618, @felixhandte)
+build: `ZBUFF_*()` is no longer built by default (#2583, @senhuang42)
+build: Fixed Meson build (#2548, @SupervisedThinking @kloczek)
+build: Fix excessive compiler warnings with clang-cl and CMake (#2600, @nickhutchinson)
+build: Detect presence of `md5` on Darwin (#2609, @felixhandte)
+build: Avoid SIGBUS on armv6 (#2633, @bmwiedmann)
+cli: `--progress` flag added to always display progress bar (#2595, @senhuang42)
+cli: Allow reading from block devices with `--force` (#2613, @felixhandte)
+cli: Fix CLI filesize display bug (#2550, @Cyan4973)
+cli: Fix windows CLI `--filelist` end-of-line bug (#2620, @Cyan4973)
+contrib: Various fixes for linux kernel patch (#2539, @terrelln)
+contrib: Seekable format - Decompression hanging edge case fix (#2516, @senhuang42)
+contrib: Seekable format - New seek table-only API  (#2113 #2518, @mdittmer @Cyan4973)
+contrib: Seekable format - Fix seek table descriptor check when loading (#2534, @foxeng)
+contrib: Seekable format - Decompression fix for large offsets, (#2594, @azat)
+misc: Automatically published release tarballs available on Github (#2535, @felixhandte)
+
+v1.4.9  (Mar 1, 2021)
+bug: Use `umask()` to Constrain Created File Permissions (#2495, @felixhandte)
+bug: Make Simple Single-Pass Functions Ignore Advanced Parameters (#2498, @terrelln)
+api: Add (De)Compression Tracing Functionality (#2482, @terrelln)
+api: Support References to Multiple DDicts (#2446, @senhuang42)
+api: Add Function to Generate Skippable Frame (#2439, @senhuang42)
+perf: New Algorithms for the Long Distance Matcher (#2483, @mpu)
+perf: Performance Improvements for Long Distance Matcher (#2464, @mpu)
+perf: Don't Shrink Window Log when Streaming with a Dictionary (#2451, @terrelln)
+cli: Fix `--output-dir-mirror`'s Rejection of `..`-Containing Paths (#2512, @felixhandte)
+cli: Allow Input From Console When `-f`/`--force` is Passed (#2466, @felixhandte)
+cli: Improve Help Message (#2500, @senhuang42)
+tests: Remove Flaky Tests (#2455, #2486, #2445, @Cyan4973)
+tests: Correctly Invoke md5 Utility on NetBSD (#2492, @niacat)
+tests: Avoid Using `stat -c` on NetBSD (#2513, @felixhandte)
+build: Zstd CLI Can Now be Linked to Dynamic `libzstd` (#2457, #2454 @Cyan4973)
+build: Hide and Avoid Using Static-Only Symbols (#2501, #2504, @skitt)
+build: CMake: Enable Only C for lib/ and programs/ Projects (#2498, @concatime)
+build: CMake: Use `configure_file()` to Create the `.pc` File (#2462, @lazka)
+build: Fix Fuzzer Compiler Detection & Update UBSAN Flags (#2503, @terrelln)
+build: Add Guards for `_LARGEFILE_SOURCE` and `_LARGEFILE64_SOURCE` (#2444, @indygreg)
+build: Improve `zlibwrapper` Makefile (#2437, @Cyan4973)
+contrib: Add `recover_directory` Program (#2473, @terrelln)
+doc: Change License Year to 2021 (#2452 & #2465, @terrelln & @senhuang42)
+doc: Fix Typos (#2459, @ThomasWaldmann)
+
+v1.4.8  (Dec 18, 2020)
+hotfix: wrong alignment of an internal buffer
+
+v1.4.7  (Dec 16, 2020)
 perf: stronger --long mode at high compression levels, by @senhuang42
 perf: stronger --patch-from at high compression levels, thanks to --long improvements
 perf: faster dictionary compression at medium compression levels, by @felixhandte
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bb85d58..5effa26 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@
 ## Our Development Process
 New versions are being developed in the "dev" branch,
 or in their own feature branch.
-When they are deemed ready for a release, they are merged into "master".
+When they are deemed ready for a release, they are merged into "release".
 
 As a consequences, all contributions must stage first through "dev"
 or their own feature branch.
@@ -383,7 +383,7 @@
 that get run will depend on the destination branch you specify. Some tests take
 longer to run than others. Currently, our CI is set up to run a short
 series of tests when creating a PR to the dev branch and a longer series of tests
-when creating a PR to the master branch. You can look in the configuration files
+when creating a PR to the release branch. You can look in the configuration files
 of the respective CI platform for more information on what gets run when.
 
 Most people will just want to create a PR with the destination set to their local dev
@@ -399,7 +399,105 @@
 outlined on that page and do not file a public issue.
 
 ## Coding Style
+It's a pretty long topic, which is difficult to summarize in a single paragraph.
+As a rule of thumbs, try to imitate the coding style of
+similar lines of codes around your contribution.
+The following is a non-exhaustive list of rules employed in zstd code base:
+
+### C90
+This code base is following strict C90 standard,
+with 2 extensions : 64-bit `long long` types, and variadic macros.
+This rule is applied strictly to code within `lib/` and `programs/`.
+Sub-project in `contrib/` are allowed to use other conventions.
+
+### C++ direct compatibility : symbol mangling
+All public symbol declarations must be wrapped in `extern “C” { … }`,
+so that this project can be compiled as C++98 code,
+and linked into C++ applications.
+
+### Minimal Frugal
+This design requirement is fundamental to preserve the portability of the code base.
+#### Dependencies
+- Reduce dependencies to the minimum possible level.
+  Any dependency should be considered “bad” by default,
+  and only tolerated because it provides a service in a better way than can be achieved locally.
+  The only external dependencies this repository tolerates are
+  standard C libraries, and in rare cases, system level headers.
+- Within `lib/`, this policy is even more drastic.
+  The only external dependencies allowed are `<assert.h>`, `<stdlib.h>`, `<string.h>`,
+  and even then, not directly.
+  In particular, no function shall ever allocate on heap directly,
+  and must use instead `ZSTD_malloc()` and equivalent.
+  Other accepted non-symbol headers are `<stddef.h>` and `<limits.h>`.
+- Within the project, there is a strict hierarchy of dependencies that must be respected.
+  `programs/` is allowed to depend on `lib/`, but only its public API.
+  Within `lib/`, `lib/common` doesn't depend on any other directory.
+  `lib/compress` and `lib/decompress` shall not depend on each other.
+  `lib/dictBuilder` can depend on `lib/common` and `lib/compress`, but not `lib/decompress`.
+#### Resources
+- Functions in `lib/` must use very little stack space,
+  several dozens of bytes max.
+  Everything larger must use the heap allocator,
+  or require a scratch buffer to be emplaced manually.
+
+### Naming
+* All public symbols are prefixed with `ZSTD_`
+  + private symbols, with a scope limited to their own unit, are free of this restriction.
+    However, since `libzstd` source code can be amalgamated,
+    each symbol name must attempt to be (and remain) unique.
+    Avoid too generic names that could become ground for future collisions.
+    This generally implies usage of some form of prefix.
+* For symbols (functions and variables), naming convention is `PREFIX_camelCase`.
+  + In some advanced cases, one can also find :
+    - `PREFIX_prefix2_camelCase`
+    - `PREFIX_camelCase_extendedQualifier`
+* Multi-words names generally consist of an action followed by object:
+  - for example : `ZSTD_createCCtx()`
+* Prefer positive actions
+  - `goBackward` rather than `notGoForward`
+* Type names (`struct`, etc.) follow similar convention,
+  except that they are allowed and even invited to start by an Uppercase letter.
+  Example : `ZSTD_CCtx`, `ZSTD_CDict`
+* Macro names are all Capital letters.
+  The same composition rules (`PREFIX_NAME_QUALIFIER`) apply.
+* File names are all lowercase letters.
+  The convention is `snake_case`.
+  File names **must** be unique across the entire code base,
+  even when they stand in clearly separated directories.
+
+### Qualifiers
+* This code base is `const` friendly, if not `const` fanatical.
+  Any variable that can be `const` (aka. read-only) **must** be `const`.
+  Any pointer which content will not be modified must be `const`.
+  This property is then controlled at compiler level.
+  `const` variables are an important signal to readers that this variable isn’t modified.
+  Conversely, non-const variables are a signal to readers to watch out for modifications later on in the function.
+* If a function must be inlined, mention it explicitly,
+  using project's own portable macros, such as `FORCE_INLINE_ATTR`,
+  defined in `lib/common/compiler.h`.
+
+### Debugging
+* **Assertions** are welcome, and should be used very liberally,
+  to control any condition the code expects for its correct execution.
+  These assertion checks will be run in debug builds, and disabled in production.
+* For traces, this project provides its own debug macros,
+  in particular `DEBUGLOG(level, ...)`, defined in `lib/common/debug.h`.
+
+### Code documentation
+* Avoid code documentation that merely repeats what the code is already stating.
+  Whenever applicable, prefer employing the code as the primary way to convey explanations.
+  Example 1 : `int nbTokens = n;` instead of `int i = n; /* i is a nb of tokens *./`.
+  Example 2 : `assert(size > 0);` instead of `/* here, size should be positive */`.
+* At declaration level, the documentation explains how to use the function or variable
+  and when applicable why it's needed, of the scenarios where it can be useful.
+* At implementation level, the documentation explains the general outline of the algorithm employed,
+  and when applicable why this specific choice was preferred.
+
+### General layout
 * 4 spaces for indentation rather than tabs
+* Code documentation shall directly precede function declaration or implementation
+* Function implementations and its code documentation should be preceded and followed by an empty line
+
 
 ## License
 By contributing to Zstandard, you agree that your contributions will be licensed
diff --git a/METADATA b/METADATA
index ec7a6bb..e6845ab 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@
     type: GIT
     value: "https://github.com/facebook/zstd"
   }
-  version: "v1.4.7"
+  version: "v1.5.0"
   license_type: RESTRICTED
   last_upgrade_date {
-    year: 2020
-    month: 12
-    day: 17
+    year: 2021
+    month: 9
+    day: 10
   }
 }
diff --git a/Makefile b/Makefile
index 2832fb4..c1908f0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -48,7 +48,7 @@
 
 # skip zwrapper, can't build that on alternate architectures without the proper zlib installed
 .PHONY: allzstd
-allzstd: lib-all
+allzstd: lib
 	$(Q)$(MAKE) -C $(PRGDIR) all
 	$(Q)$(MAKE) -C $(TESTDIR) all
 
@@ -57,9 +57,8 @@
 	$(MAKE) -C $(PRGDIR) zstd32
 	$(MAKE) -C $(TESTDIR) all32
 
-.PHONY: lib lib-release libzstd.a
-lib-all : lib
-lib lib-release lib-all :
+.PHONY: lib lib-release lib-mt lib-nomt
+lib lib-release lib-mt lib-nomt:
 	$(Q)$(MAKE) -C $(ZSTDDIR) $@
 
 .PHONY: zstd zstd-release
@@ -123,8 +122,8 @@
 	$(MAKE) -C contrib/seekable_format/examples all
 	$(MAKE) -C contrib/seekable_format/tests test
 	$(MAKE) -C contrib/largeNbDicts all
-	cd contrib/single_file_libs/ ; ./build_decoder_test.sh
-	cd contrib/single_file_libs/ ; ./build_library_test.sh
+	cd build/single_file_libs/ ; ./build_decoder_test.sh
+	cd build/single_file_libs/ ; ./build_library_test.sh
 
 .PHONY: cleanTabs
 cleanTabs:
@@ -152,7 +151,6 @@
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku))
 
 HOST_OS = POSIX
-CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
 
 HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 EGREP_OPTIONS ?=
@@ -180,7 +178,7 @@
 	    done \
 	} | column -t -s $$'\t'
 
-.PHONY: install armtest usan asan uasan
+.PHONY: install armtest usan asan uasan msan asan32
 install:
 	$(Q)$(MAKE) -C $(ZSTDDIR) $@
 	$(Q)$(MAKE) -C $(PRGDIR) $@
@@ -194,22 +192,19 @@
 travis-install:
 	$(MAKE) install PREFIX=~/install_test_dir
 
-.PHONY: gcc5build
+.PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build
 gcc5build: clean
 	gcc-5 -v
 	CC=gcc-5 $(MAKE) all MOREFLAGS="-Werror"
 
-.PHONY: gcc6build
 gcc6build: clean
 	gcc-6 -v
 	CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror"
 
-.PHONY: gcc7build
 gcc7build: clean
 	gcc-7 -v
 	CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror"
 
-.PHONY: clangbuild
 clangbuild: clean
 	clang -v
 	CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" $(MAKE) all
@@ -225,11 +220,12 @@
 	CC=aarch64-linux-gnu-gcc CFLAGS="-Werror" $(MAKE) allzstd
 
 ppcbuild: clean
-	CC=powerpc-linux-gnu-gcc CFLAGS="-m32 -Wno-attributes -Werror" $(MAKE) allzstd
+	CC=powerpc-linux-gnu-gcc CFLAGS="-m32 -Wno-attributes -Werror" $(MAKE) -j allzstd
 
 ppc64build: clean
-	CC=powerpc-linux-gnu-gcc CFLAGS="-m64 -Werror" $(MAKE) allzstd
+	CC=powerpc-linux-gnu-gcc CFLAGS="-m64 -Werror" $(MAKE) -j allzstd
 
+.PHONY: armfuzz aarch64fuzz ppcfuzz ppc64fuzz
 armfuzz: clean
 	CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
 
@@ -243,7 +239,7 @@
 ppc64fuzz: clean
 	CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
 
-.PHONY: cxxtest
+.PHONY: cxxtest gcc5test gcc6test armtest aarch64test ppctest ppc64test
 cxxtest: CXXFLAGS += -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
 cxxtest: clean
 	$(MAKE) -C $(PRGDIR) all CC="$(CXX) -Wno-deprecated" CFLAGS="$(CXXFLAGS)"   # adding -Wno-deprecated to avoid clang++ warning on dealing with C files directly
@@ -272,6 +268,7 @@
 	$(MAKE) -C $(TESTDIR) datagen   # use native, faster
 	$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests
 
+.PHONY: arm-ppc-compilation
 arm-ppc-compilation:
 	$(MAKE) -C $(PRGDIR) clean zstd CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
 	$(MAKE) -C $(PRGDIR) clean zstd CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
@@ -287,12 +284,10 @@
 msanregressiontest:
 	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
 
-# run UBsan with -fsanitize-recover=signed-integer-overflow
-# due to a bug in UBsan when doing pointer subtraction
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63303
-
+# run UBsan with -fsanitize-recover=pointer-overflow
+# this only works with recent compilers such as gcc 8+
 usan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=signed-integer-overflow -fsanitize=undefined -Werror"
+	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=undefined -Werror"
 
 asan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror"
@@ -310,21 +305,24 @@
 	$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address"
 
 uasan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=signed-integer-overflow -fsanitize=address,undefined -Werror"
+	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror"
 
 uasan-%: clean
-	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=signed-integer-overflow -fsanitize=address,undefined -Werror" $(MAKE) -C $(TESTDIR) $*
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror" $(MAKE) -C $(TESTDIR) $*
 
 tsan-%: clean
 	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS=--no-big-tests
 
+.PHONY: apt-install
 apt-install:
 	sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install $(APT_PACKAGES)
 
+.PHONY: apt-add-repo
 apt-add-repo:
 	sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
 	sudo apt-get update -y -qq
 
+.PHONY: ppcinstall arminstall valgrindinstall libc6install gcc6install gcc7install gcc8install gpp6install clang38install lz4install
 ppcinstall:
 	APT_PACKAGES="qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu" $(MAKE) apt-install
 
@@ -359,16 +357,18 @@
 endif
 
 
+CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+
 ifneq (,$(filter MSYS%,$(shell uname)))
 HOST_OS = MSYS
 CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
 endif
 
-
 #------------------------------------------------------------------------
 # target specific tests
 #------------------------------------------------------------------------
 ifneq (,$(filter $(HOST_OS),MSYS POSIX))
+.PHONY: cmakebuild c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
 cmakebuild:
 	cmake --version
 	$(RM) -r $(BUILDIR)/cmake/build
diff --git a/README.md b/README.md
index 0f36a5f..dcca766 100644
--- a/README.md
+++ b/README.md
@@ -193,7 +193,7 @@
 
 ## Contributing
 
-The "dev" branch is the one where all contributions are merged before reaching "master".
-If you plan to propose a patch, please commit into the "dev" branch, or its own feature branch.
-Direct commit to "master" are not permitted.
+The `dev` branch is the one where all contributions are merged before reaching `release`.
+If you plan to propose a patch, please commit into the `dev` branch, or its own feature branch.
+Direct commit to `release` are not permitted.
 For more information, please read [CONTRIBUTING](CONTRIBUTING.md).
diff --git a/TESTING.md b/TESTING.md
index 7e53051..32b133b 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -27,7 +27,7 @@
 
 Long Tests
 ----------
-Long tests run on all commits to `master` branch,
+Long tests run on all commits to `release` branch,
 and once a day on the current version of `dev` branch,
 on TravisCI.
 They consist of the following tests:
@@ -40,5 +40,4 @@
 - Versions test (ensuring `zstd` can decode files from all previous versions)
 - `pzstd` with asan and tsan, as well as in 32-bits mode
 - Testing `zstd` with legacy mode off
-- Testing `zbuff` (old streaming API)
 - Entire test suite and make install on macOS
diff --git a/appveyor.yml b/appveyor.yml
index 169c66b..c6ab786 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,20 +1,20 @@
-# Following tests are run _only_ on master branch
-# To reproduce these tests, it's possible to push into a branch `appveyorTest`
-# or a branch `visual*`, they will intentionnally trigger `master` tests
+# Following tests are run _only_ on `release` branch
+# and on selected feature branch named `appveyorTest` or `visual*`
 
 -
   version: 1.0.{build}
   branches:
     only:
+    - release
     - master
-    - appveyorTest
+    - /appveyor*/
     - /visual*/
   environment:
     matrix:
     - COMPILER: "gcc"
       HOST:     "mingw"
       PLATFORM: "x64"
-      SCRIPT:   "make allzstd MOREFLAGS=-static && make -C tests fullbench-lib"
+      SCRIPT:   "make allzstd MOREFLAGS=-static"
       ARTIFACT: "true"
       BUILD:    "true"
     - COMPILER: "gcc"
@@ -26,7 +26,7 @@
     - COMPILER: "clang"
       HOST:     "mingw"
       PLATFORM: "x64"
-      SCRIPT:   "MOREFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make allzstd"
+      SCRIPT:   "MOREFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"
       BUILD:    "true"
 
     - COMPILER: "gcc"
@@ -52,6 +52,15 @@
       PLATFORM: "Win32"
       CONFIGURATION: "Release"
 
+    - COMPILER: "clang-cl"
+      HOST:     "cmake-visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+      CMAKE_GENERATOR: "Visual Studio 15 2017"
+      CMAKE_GENERATOR_PLATFORM: "x64"
+      CMAKE_GENERATOR_TOOLSET: "LLVM"
+      APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
+
   install:
   - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
   - SET PATH_ORIGINAL=%PATH%
@@ -92,9 +101,9 @@
           cd programs\ && 7z a -tzip -mx9 zstd-win-binary-%PLATFORM%.zip zstd.exe &&
           appveyor PushArtifact zstd-win-binary-%PLATFORM%.zip &&
           cp zstd.exe ..\bin\zstd.exe &&
-          git clone --depth 1 --branch master https://github.com/facebook/zstd &&
+          git clone --depth 1 --branch release https://github.com/facebook/zstd &&
           cd zstd &&
-          git archive --format=tar master -o zstd-src.tar &&
+          git archive --format=tar release -o zstd-src.tar &&
           ..\zstd -19 zstd-src.tar &&
           appveyor PushArtifact zstd-src.tar.zst &&
           certUtil -hashfile zstd-src.tar.zst SHA256 > zstd-src.tar.zst.sha256.sig &&
@@ -154,6 +163,15 @@
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
     )
+  - if [%HOST%]==[cmake-visual] (
+      ECHO *** &&
+      ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
+      PUSHD build\cmake &&
+      cmake -DBUILD_TESTING=ON . &&
+      cmake --build . --config %CONFIGURATION% -j4 &&
+      POPD &&
+      ECHO ***
+    )
 
   test_script:
   - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
@@ -204,7 +222,7 @@
     - COMPILER: "clang"
       HOST:     "mingw"
       PLATFORM: "x64"
-      SCRIPT:   "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd"
+      SCRIPT:   "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"
 
     - COMPILER: "visual"
       HOST:     "visual"
@@ -223,13 +241,21 @@
       PLATFORM: "Win32"
       CONFIGURATION: "Release"
 
+    - COMPILER: "clang-cl"
+      HOST:     "cmake-visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+      CMAKE_GENERATOR: "Visual Studio 15 2017"
+      CMAKE_GENERATOR_PLATFORM: "x64"
+      CMAKE_GENERATOR_TOOLSET: "LLVM"
+      APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
+
   install:
   - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
   - SET PATH_ORIGINAL=%PATH%
   - if [%HOST%]==[cygwin] (
       ECHO Installing Cygwin Packages &&
       C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^
-        gcc-g++,^
         gcc,^
         cmake,^
         make
@@ -252,8 +278,8 @@
       C:\cygwin64\bin\bash --login -c "
         set -e;
         cd build/cmake;
-        CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_ZSTREAM_FLAGS=-T30s .;
-        make -j4;
+        CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T20s -DZSTD_ZSTREAM_FLAGS=-T20s -DZSTD_FULLBENCH_FLAGS=-i0 .;
+        make VERBOSE=1 -j;
         ctest -V -L Medium;
       "
     )
@@ -281,6 +307,15 @@
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
     )
+  - if [%HOST%]==[cmake-visual] (
+      ECHO *** &&
+      ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
+      PUSHD build\cmake &&
+      cmake -DBUILD_TESTING=ON . &&
+      cmake --build . --config %CONFIGURATION% -j4 &&
+      POPD &&
+      ECHO ***
+    )
 
 
   test_script:
diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj
index 5752643..5e349dc 100644
--- a/build/VS2008/fullbench/fullbench.vcproj
+++ b/build/VS2008/fullbench/fullbench.vcproj
@@ -463,7 +463,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj
index d48bc0f..32f2846 100644
--- a/build/VS2008/fuzzer/fuzzer.vcproj
+++ b/build/VS2008/fuzzer/fuzzer.vcproj
@@ -483,7 +483,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -511,7 +511,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index ab02e61..c7eec57 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -481,6 +481,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\programs\zstdcli_trace.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
 				>
 			</File>
@@ -555,7 +559,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
@@ -571,7 +575,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj
index 5eb49f9..88c1aee 100644
--- a/build/VS2008/zstdlib/zstdlib.vcproj
+++ b/build/VS2008/zstdlib/zstdlib.vcproj
@@ -495,7 +495,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -523,7 +523,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
index 20932fa..2e0a042 100644
--- a/build/VS2010/fullbench/fullbench.vcxproj
+++ b/build/VS2010/fullbench/fullbench.vcxproj
@@ -190,7 +190,7 @@
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\common\pool.h" />
     <ClInclude Include="..\..\..\lib\common\threading.h" />
diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
index 8427572..91974ec 100644
--- a/build/VS2010/fuzzer/fuzzer.vcxproj
+++ b/build/VS2010/fuzzer/fuzzer.vcxproj
@@ -196,7 +196,7 @@
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
@@ -211,7 +211,7 @@
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
     <ClInclude Include="..\..\..\lib\decompress\zstd_ddict.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
+    <ClInclude Include="..\..\..\lib\zdict.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\cover.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\programs\datagen.h" />
diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
index 0957d41..a0aa897 100644
--- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
@@ -44,9 +44,6 @@
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress_block.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_ddict.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\fastcover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
@@ -64,12 +61,11 @@
     <ClInclude Include="..\..\..\lib\common\threading.h" />
     <ClInclude Include="..\..\..\lib\common\bitstream.h" />
     <ClInclude Include="..\..\..\lib\common\error_private.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\mem.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
-    <ClInclude Include="..\..\..\lib\deprecated\zbuff.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj
index 2034293..17c08d7 100644
--- a/build/VS2010/libzstd/libzstd.vcxproj
+++ b/build/VS2010/libzstd/libzstd.vcxproj
@@ -44,9 +44,6 @@
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress_block.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_ddict.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\fastcover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
@@ -64,12 +61,11 @@
     <ClInclude Include="..\..\..\lib\common\threading.h" />
     <ClInclude Include="..\..\..\lib\common\bitstream.h" />
     <ClInclude Include="..\..\..\lib\common\error_private.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\mem.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
-    <ClInclude Include="..\..\..\lib\deprecated\zbuff.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index e320d88..46e22f4 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -63,20 +63,21 @@
     <ClCompile Include="..\..\..\programs\dibio.c" />
     <ClCompile Include="..\..\..\programs\fileio.c" />
     <ClCompile Include="..\..\..\programs\zstdcli.c" />
+    <ClCompile Include="..\..\..\programs\zstdcli_trace.c" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\common\pool.h" />
     <ClInclude Include="..\..\..\lib\common\threading.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
+    <ClInclude Include="..\..\..\lib\zdict.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\cover.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
diff --git a/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake b/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake
index 6238971..e23b9d6 100644
--- a/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake
+++ b/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake
@@ -26,7 +26,12 @@
         EnableCompilerFlag("-std=c++11" false true)
         #Set c99 by default
         EnableCompilerFlag("-std=c99" true false)
-        EnableCompilerFlag("-Wall" true true)
+        if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND MSVC)
+            # clang-cl normally maps -Wall to -Weverything.
+            EnableCompilerFlag("/clang:-Wall" true true)
+        else ()
+            EnableCompilerFlag("-Wall" true true)
+        endif ()
         EnableCompilerFlag("-Wextra" true true)
         EnableCompilerFlag("-Wundef" true true)
         EnableCompilerFlag("-Wshadow" true true)
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 088c876..5f75665 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # ################################################################
 
-project(libzstd)
+project(libzstd C)
 
 set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
 option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
@@ -24,28 +24,24 @@
 file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c)
 file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
 file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c)
-file(GLOB DeprecatedSources ${LIBRARY_DIR}/deprecated/*.c)
 
 set(Sources
         ${CommonSources}
         ${CompressSources}
         ${DecompressSources}
-        ${DictBuilderSources}
-        ${DeprecatedSources})
+        ${DictBuilderSources})
 
 file(GLOB CommonHeaders ${LIBRARY_DIR}/common/*.h)
 file(GLOB CompressHeaders ${LIBRARY_DIR}/compress/*.h)
 file(GLOB DecompressHeaders ${LIBRARY_DIR}/decompress/*.h)
 file(GLOB DictBuilderHeaders ${LIBRARY_DIR}/dictBuilder/*.h)
-file(GLOB DeprecatedHeaders ${LIBRARY_DIR}/deprecated/*.h)
 
 set(Headers
         ${LIBRARY_DIR}/zstd.h
         ${CommonHeaders}
         ${CompressHeaders}
         ${DecompressHeaders}
-        ${DictBuilderHeaders}
-        ${DeprecatedHeaders})
+        ${DictBuilderHeaders})
 
 if (ZSTD_LEGACY_SUPPORT)
     set(LIBRARY_LEGACY_DIR ${LIBRARY_DIR}/legacy)
@@ -137,7 +133,7 @@
 if (UNIX OR MINGW)
     # pkg-config
     set(PREFIX "${CMAKE_INSTALL_PREFIX}")
-    set(EXEC_PREFIX "\\$$\{prefix}")
+    set(EXEC_PREFIX "\${prefix}")
     set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}")
     set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
     set(VERSION "${zstd_VERSION}")
@@ -149,32 +145,21 @@
     string(SUBSTRING "${INCLUDEDIR}" ${PREFIX_LENGTH} -1 INCLUDEDIR_SUFFIX)
 
     if ("${INCLUDEDIR_PREFIX}" STREQUAL "${PREFIX}")
-        set(INCLUDEDIR_PREFIX "\\$$\{prefix}")
+        set(INCLUDEDIR "\${prefix}${INCLUDEDIR_SUFFIX}")
     endif()
     if ("${LIBDIR_PREFIX}" STREQUAL "${PREFIX}")
-        set(LIBDIR_PREFIX "\\$$\{exec_prefix}")
+        set(LIBDIR "\${exec_prefix}${LIBDIR_SUFFIX}")
     endif()
 
-    add_custom_target(libzstd.pc ALL
-            ${CMAKE_COMMAND}
-            -DIN=${LIBRARY_DIR}/libzstd.pc.in
-            -DOUT="libzstd.pc"
-            -DPREFIX="${PREFIX}"
-            -DEXEC_PREFIX="${EXEC_PREFIX}"
-            -DINCLUDEDIR="${INCLUDEDIR_PREFIX}${INCLUDEDIR_SUFFIX}"
-            -DLIBDIR="${LIBDIR_PREFIX}${LIBDIR_SUFFIX}"
-            -DVERSION="${VERSION}"
-            -P ${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake
-            COMMENT "Creating pkg-config file")
-
+    configure_file("${LIBRARY_DIR}/libzstd.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libzstd.pc" @ONLY)
     install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libzstd.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
 endif ()
 
 # install target
 install(FILES
     "${LIBRARY_DIR}/zstd.h"
-    "${LIBRARY_DIR}/dictBuilder/zdict.h"
-    "${LIBRARY_DIR}/common/zstd_errors.h"
+    "${LIBRARY_DIR}/zdict.h"
+    "${LIBRARY_DIR}/zstd_errors.h"
     DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
 
 install(TARGETS ${library_targets}
diff --git a/build/cmake/lib/pkgconfig.cmake b/build/cmake/lib/pkgconfig.cmake
deleted file mode 100644
index 8f805a1..0000000
--- a/build/cmake/lib/pkgconfig.cmake
+++ /dev/null
@@ -1 +0,0 @@
-configure_file("${IN}" "${OUT}" @ONLY)
diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt
index b26e97d..f1d1277 100644
--- a/build/cmake/programs/CMakeLists.txt
+++ b/build/cmake/programs/CMakeLists.txt
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # ################################################################
 
-project(programs)
+project(programs C)
 
 set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
 
@@ -32,7 +32,7 @@
     set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc)
 endif ()
 
-add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources})
+add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PROGRAMS_DIR}/zstdcli_trace.c ${PlatformDependResources})
 target_link_libraries(zstd ${PROGRAMS_ZSTD_LINK_TARGET})
 if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)")
     target_link_libraries(zstd rt)
@@ -75,7 +75,7 @@
 
     add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c)
     target_link_libraries(zstd-frugal ${PROGRAMS_ZSTD_LINK_TARGET})
-    set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT")
+    set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT;ZSTD_NOTRACE")
 endif ()
 
 # Add multi-threading support definitions
diff --git a/build/cmake/tests/.gitignore b/build/cmake/tests/.gitignore
index 2ab62a3..ca2947f 100644
--- a/build/cmake/tests/.gitignore
+++ b/build/cmake/tests/.gitignore
@@ -3,5 +3,4 @@
 fullbench
 fuzzer
 paramgrill
-zbufftest
 
diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 34eca91..8bba6ea 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -57,13 +57,15 @@
 # fullbench
 #
 add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
+set_property(TARGET fullbench APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
 target_link_libraries(fullbench libzstd_static)
-add_test(NAME fullbench COMMAND fullbench)
+add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS})
 
 #
 # fuzzer
 #
 add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c)
+set_property(TARGET fuzzer APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
 target_link_libraries(fuzzer libzstd_static)
 AddTestFlagsOption(ZSTD_FUZZER_FLAGS "$ENV{FUZZERTEST} $ENV{FUZZER_FLAGS}"
     "Semicolon-separated list of flags to pass to the fuzzer test (see `fuzzer -h` for usage)")
@@ -76,6 +78,7 @@
 # zstreamtest
 #
 add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c)
+set_property(TARGET zstreamtest APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
 target_link_libraries(zstreamtest libzstd_static)
 AddTestFlagsOption(ZSTD_ZSTREAM_FLAGS "$ENV{ZSTREAM_TESTTIME} $ENV{FUZZER_FLAGS}"
     "Semicolon-separated list of flags to pass to the zstreamtest test (see `zstreamtest -h` for usage)")
diff --git a/build/meson/contrib/pzstd/meson.build b/build/meson/contrib/pzstd/meson.build
index 8f3822f..dcf2136 100644
--- a/build/meson/contrib/pzstd/meson.build
+++ b/build/meson/contrib/pzstd/meson.build
@@ -18,7 +18,7 @@
   join_paths(zstd_rootdir, 'contrib/pzstd/SkippableFrame.cpp')]
 pzstd = executable('pzstd',
   pzstd_sources,
-  cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic' ],
+  cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic', '-Wno-deprecated-declarations' ],
   include_directories: pzstd_includes,
   dependencies: [ libzstd_dep, thread_dep ],
   install: true)
diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build
index 17806c8..5cc9fee 100644
--- a/build/meson/lib/meson.build
+++ b/build/meson/lib/meson.build
@@ -14,8 +14,7 @@
   join_paths(zstd_rootdir, 'lib/common'),
   join_paths(zstd_rootdir, 'lib/compress'),
   join_paths(zstd_rootdir, 'lib/decompress'),
-  join_paths(zstd_rootdir, 'lib/dictBuilder'),
-  join_paths(zstd_rootdir, 'lib/deprecated'))]
+  join_paths(zstd_rootdir, 'lib/dictBuilder'))]
 
 libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
   join_paths(zstd_rootdir, 'lib/common/fse_decompress.c'),
@@ -44,10 +43,7 @@
   join_paths(zstd_rootdir, 'lib/dictBuilder/cover.c'),
   join_paths(zstd_rootdir, 'lib/dictBuilder/fastcover.c'),
   join_paths(zstd_rootdir, 'lib/dictBuilder/divsufsort.c'),
-  join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.c'),
-  join_paths(zstd_rootdir, 'lib/deprecated/zbuff_common.c'),
-  join_paths(zstd_rootdir, 'lib/deprecated/zbuff_compress.c'),
-  join_paths(zstd_rootdir, 'lib/deprecated/zbuff_decompress.c')]
+  join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.c')]
 
 # Explicit define legacy support
 add_project_arguments('-DZSTD_LEGACY_SUPPORT=@0@'.format(legacy_level),
@@ -127,5 +123,5 @@
   url: 'http://www.zstd.net/')
 
 install_headers(join_paths(zstd_rootdir, 'lib/zstd.h'),
-  join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.h'),
-  join_paths(zstd_rootdir, 'lib/common/zstd_errors.h'))
+  join_paths(zstd_rootdir, 'lib/zdict.h'),
+  join_paths(zstd_rootdir, 'lib/zstd_errors.h'))
diff --git a/build/meson/programs/meson.build b/build/meson/programs/meson.build
index 363818f..d255627 100644
--- a/build/meson/programs/meson.build
+++ b/build/meson/programs/meson.build
@@ -17,7 +17,8 @@
   join_paths(zstd_rootdir, 'programs/benchfn.c'),
   join_paths(zstd_rootdir, 'programs/benchzstd.c'),
   join_paths(zstd_rootdir, 'programs/datagen.c'),
-  join_paths(zstd_rootdir, 'programs/dibio.c')]
+  join_paths(zstd_rootdir, 'programs/dibio.c'),
+  join_paths(zstd_rootdir, 'programs/zstdcli_trace.c')]
 
 zstd_c_args = libzstd_debug_cflags
 if use_multi_thread
@@ -73,7 +74,7 @@
 executable('zstd-frugal',
   zstd_frugal_sources,
   dependencies: libzstd_dep,
-  c_args: [ '-DZSTD_NOBENCH', '-DZSTD_NODICT' ],
+  c_args: [ '-DZSTD_NOBENCH', '-DZSTD_NODICT', '-DZSTD_NOTRACE' ],
   install: true)
 
 install_data(join_paths(zstd_rootdir, 'programs/zstdgrep'),
diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build
index 0587f9a..1b23363 100644
--- a/build/meson/tests/meson.build
+++ b/build/meson/tests/meson.build
@@ -57,7 +57,7 @@
 fuzzer = executable('fuzzer',
   fuzzer_sources,
   include_directories: test_includes,
-  dependencies: libzstd_dep,
+  dependencies: [ libzstd_dep, thread_dep ],
   install: false)
 
 zstreamtest_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
diff --git a/contrib/single_file_libs/.gitignore b/build/single_file_libs/.gitignore
similarity index 100%
rename from contrib/single_file_libs/.gitignore
rename to build/single_file_libs/.gitignore
diff --git a/contrib/single_file_libs/README.md b/build/single_file_libs/README.md
similarity index 96%
rename from contrib/single_file_libs/README.md
rename to build/single_file_libs/README.md
index d88e8fc..1705b76 100644
--- a/contrib/single_file_libs/README.md
+++ b/build/single_file_libs/README.md
@@ -11,7 +11,7 @@
 
 Create `zstddeclib.c` from the Zstd source using:
 ```
-cd zstd/contrib/single_file_libs
+cd zstd/build/single_file_libs
 ./combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c
 ```
 Then add the resulting file to your project (see the [example files](examples)).
@@ -25,7 +25,7 @@
 
 Create `zstd.c` from the Zstd source using:
 ```
-cd zstd/contrib/single_file_libs
+cd zstd/build/single_file_libs
 ./combine.sh -r ../../lib -o zstd.c zstd-in.c
 ```
 It's possible to create a compressor-only library but since the decompressor is so small in comparison this doesn't bring much of a gain (but for the curious, simply remove the files in the _decompress_ section at the end of `zstd-in.c`).
diff --git a/contrib/single_file_libs/build_decoder_test.sh b/build/single_file_libs/build_decoder_test.sh
similarity index 100%
rename from contrib/single_file_libs/build_decoder_test.sh
rename to build/single_file_libs/build_decoder_test.sh
diff --git a/contrib/single_file_libs/build_library_test.sh b/build/single_file_libs/build_library_test.sh
similarity index 100%
rename from contrib/single_file_libs/build_library_test.sh
rename to build/single_file_libs/build_library_test.sh
diff --git a/contrib/single_file_libs/combine.sh b/build/single_file_libs/combine.sh
similarity index 100%
rename from contrib/single_file_libs/combine.sh
rename to build/single_file_libs/combine.sh
diff --git a/contrib/single_file_libs/create_single_file_decoder.sh b/build/single_file_libs/create_single_file_decoder.sh
similarity index 100%
rename from contrib/single_file_libs/create_single_file_decoder.sh
rename to build/single_file_libs/create_single_file_decoder.sh
diff --git a/contrib/single_file_libs/create_single_file_library.sh b/build/single_file_libs/create_single_file_library.sh
similarity index 100%
rename from contrib/single_file_libs/create_single_file_library.sh
rename to build/single_file_libs/create_single_file_library.sh
diff --git a/contrib/single_file_libs/examples/README.md b/build/single_file_libs/examples/README.md
similarity index 100%
rename from contrib/single_file_libs/examples/README.md
rename to build/single_file_libs/examples/README.md
diff --git a/contrib/single_file_libs/examples/emscripten.c b/build/single_file_libs/examples/emscripten.c
similarity index 100%
rename from contrib/single_file_libs/examples/emscripten.c
rename to build/single_file_libs/examples/emscripten.c
diff --git a/contrib/single_file_libs/examples/roundtrip.c b/build/single_file_libs/examples/roundtrip.c
similarity index 100%
rename from contrib/single_file_libs/examples/roundtrip.c
rename to build/single_file_libs/examples/roundtrip.c
diff --git a/contrib/single_file_libs/examples/shell.html b/build/single_file_libs/examples/shell.html
similarity index 100%
rename from contrib/single_file_libs/examples/shell.html
rename to build/single_file_libs/examples/shell.html
diff --git a/contrib/single_file_libs/examples/simple.c b/build/single_file_libs/examples/simple.c
similarity index 100%
rename from contrib/single_file_libs/examples/simple.c
rename to build/single_file_libs/examples/simple.c
diff --git a/contrib/single_file_libs/examples/testcard-dxt1.inl b/build/single_file_libs/examples/testcard-dxt1.inl
similarity index 100%
rename from contrib/single_file_libs/examples/testcard-dxt1.inl
rename to build/single_file_libs/examples/testcard-dxt1.inl
diff --git a/contrib/single_file_libs/examples/testcard-zstd.inl b/build/single_file_libs/examples/testcard-zstd.inl
similarity index 100%
rename from contrib/single_file_libs/examples/testcard-zstd.inl
rename to build/single_file_libs/examples/testcard-zstd.inl
diff --git a/contrib/single_file_libs/examples/testcard.png b/build/single_file_libs/examples/testcard.png
similarity index 100%
rename from contrib/single_file_libs/examples/testcard.png
rename to build/single_file_libs/examples/testcard.png
Binary files differ
diff --git a/contrib/single_file_libs/zstd-in.c b/build/single_file_libs/zstd-in.c
similarity index 96%
rename from contrib/single_file_libs/zstd-in.c
rename to build/single_file_libs/zstd-in.c
index 4f0fb56..1b27953 100644
--- a/contrib/single_file_libs/zstd-in.c
+++ b/build/single_file_libs/zstd-in.c
@@ -8,7 +8,7 @@
  * \endcode
  */
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -42,6 +42,7 @@
 #ifndef __EMSCRIPTEN__
 #define ZSTD_MULTITHREAD
 #endif
+#define ZSTD_TRACE 0
 
 /* Include zstd_deps.h first with all the options we need enabled. */
 #define ZSTD_DEPS_NEED_MALLOC
diff --git a/contrib/single_file_libs/zstddeclib-in.c b/build/single_file_libs/zstddeclib-in.c
similarity index 95%
rename from contrib/single_file_libs/zstddeclib-in.c
rename to build/single_file_libs/zstddeclib-in.c
index f461b55..019d9c2 100644
--- a/contrib/single_file_libs/zstddeclib-in.c
+++ b/build/single_file_libs/zstddeclib-in.c
@@ -8,7 +8,7 @@
  * \endcode
  */
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -38,6 +38,7 @@
 #define XXH_INLINE_ALL
 #define ZSTD_LEGACY_SUPPORT 0
 #define ZSTD_STRIP_ERROR_STRINGS
+#define ZSTD_TRACE 0
 
 /* Include zstd_deps.h first with all the options we need enabled. */
 #define ZSTD_DEPS_NEED_MALLOC
diff --git a/contrib/VS2005/fullbench/fullbench.vcproj b/contrib/VS2005/fullbench/fullbench.vcproj
index c67490c..98f8593 100644
--- a/contrib/VS2005/fullbench/fullbench.vcproj
+++ b/contrib/VS2005/fullbench/fullbench.vcproj
@@ -390,7 +390,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/contrib/VS2005/fuzzer/fuzzer.vcproj b/contrib/VS2005/fuzzer/fuzzer.vcproj
index c64c503..d182535 100644
--- a/contrib/VS2005/fuzzer/fuzzer.vcproj
+++ b/contrib/VS2005/fuzzer/fuzzer.vcproj
@@ -426,7 +426,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -454,7 +454,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/contrib/VS2005/zstd/zstd.vcproj b/contrib/VS2005/zstd/zstd.vcproj
index 46cabbf..78645d1 100644
--- a/contrib/VS2005/zstd/zstd.vcproj
+++ b/contrib/VS2005/zstd/zstd.vcproj
@@ -454,7 +454,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -482,7 +482,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/contrib/VS2005/zstdlib/zstdlib.vcproj b/contrib/VS2005/zstdlib/zstdlib.vcproj
index f77df78..67ddd2d 100644
--- a/contrib/VS2005/zstdlib/zstdlib.vcproj
+++ b/contrib/VS2005/zstdlib/zstdlib.vcproj
@@ -372,18 +372,6 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\deprecated\zbuff_common.c"
-				>
-			</File>
-			<File
-				RelativePath="..\..\..\lib\deprecated\zbuff_compress.c"
-				>
-			</File>
-			<File
-				RelativePath="..\..\..\lib\deprecated\zbuff_decompress.c"
-				>
-			</File>
-			<File
 				RelativePath="..\..\..\lib\dictBuilder\zdict.c"
 				>
 			</File>
@@ -458,7 +446,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -486,11 +474,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zbuff.h"
-				>
-			</File>
-			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/contrib/diagnose_corruption/Makefile b/contrib/diagnose_corruption/Makefile
index dc6fd29..a21a002 100644
--- a/contrib/diagnose_corruption/Makefile
+++ b/contrib/diagnose_corruption/Makefile
@@ -32,4 +32,4 @@
 
 .PHONY: clean
 clean:
-	rm -f check_flipped_bits
\ No newline at end of file
+	rm -f check_flipped_bits
diff --git a/contrib/freestanding_lib/freestanding.py b/contrib/freestanding_lib/freestanding.py
index 9e91a48..1971687 100755
--- a/contrib/freestanding_lib/freestanding.py
+++ b/contrib/freestanding_lib/freestanding.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # ################################################################
-# Copyright (c) 2020-2020, Facebook, Inc.
+# Copyright (c) 2021-2021, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -27,6 +27,8 @@
     "common/pool.h",
     "common/threading.c",
     "common/threading.h",
+    "common/zstd_trace.c",
+    "common/zstd_trace.h",
     "compress/zstdmt_compress.h",
     "compress/zstdmt_compress.c",
 ]
@@ -430,7 +432,7 @@
             external_xxhash: bool, xxh64_state: Optional[str],
             xxh64_prefix: Optional[str], rewritten_includes: [(str, str)],
             defs: [(str, Optional[str])], replaces: [(str, str)],
-            undefs: [str], excludes: [str]
+            undefs: [str], excludes: [str], seds: [str],
     ):
         self._zstd_deps = zstd_deps
         self._mem = mem
@@ -444,6 +446,7 @@
         self._replaces = replaces
         self._undefs = undefs
         self._excludes = excludes
+        self._seds = seds
 
     def _dst_lib_file_paths(self):
         """
@@ -471,24 +474,25 @@
         dst_path = os.path.join(self._dst_lib, lib_path)
         self._log(f"\tCopying: {src_path} -> {dst_path}")
         shutil.copyfile(src_path, dst_path)
-    
+
     def _copy_source_lib(self):
         self._log("Copying source library into output library")
 
         assert os.path.exists(self._src_lib)
         os.makedirs(self._dst_lib, exist_ok=True)
         self._copy_file("zstd.h")
+        self._copy_file("zstd_errors.h")
         for subdir in INCLUDED_SUBDIRS:
             src_dir = os.path.join(self._src_lib, subdir)
             dst_dir = os.path.join(self._dst_lib, subdir)
-            
+
             assert os.path.exists(src_dir)
             os.makedirs(dst_dir, exist_ok=True)
 
             for filename in os.listdir(src_dir):
                 lib_path = os.path.join(subdir, filename)
                 self._copy_file(lib_path)
-    
+
     def _copy_zstd_deps(self):
         dst_zstd_deps = os.path.join(self._dst_lib, "common", "zstd_deps.h")
         self._log(f"Copying zstd_deps: {self._zstd_deps} -> {dst_zstd_deps}")
@@ -508,7 +512,7 @@
         assert not (undef and value is not None)
         for filepath in self._dst_lib_file_paths():
             file = FileLines(filepath)
-    
+
     def _hardwire_defines(self):
         self._log("Hardwiring macros")
         partial_preprocessor = PartialPreprocessor(self._defs, self._replaces, self._undefs)
@@ -536,7 +540,7 @@
                         skipped.append(line)
                         if end_re.search(line) is not None:
                             assert begin_re.search(line) is None
-                            self._log(f"\t\tRemoving excluded section: {exclude}") 
+                            self._log(f"\t\tRemoving excluded section: {exclude}")
                             for s in skipped:
                                 self._log(f"\t\t\t- {s}")
                             emit = True
@@ -559,12 +563,12 @@
                 e = match.end('include')
                 file.lines[i] = line[:s] + rewritten + line[e:]
             file.write()
-    
+
     def _rewrite_includes(self):
         self._log("Rewriting includes")
         for original, rewritten in self._rewritten_includes:
             self._rewrite_include(original, rewritten)
-    
+
     def _replace_xxh64_prefix(self):
         if self._xxh64_prefix is None:
             return
@@ -576,7 +580,7 @@
             )
         if self._xxh64_prefix is not None:
             replacements.append(
-                (re.compile(r"([^\w]|^)(?P<orig>XXH64)_"), self._xxh64_prefix)
+                (re.compile(r"([^\w]|^)(?P<orig>XXH64)[\(_]"), self._xxh64_prefix)
             )
         for filepath in self._dst_lib_file_paths():
             file = FileLines(filepath)
@@ -596,6 +600,48 @@
                 file.lines[i] = line
             file.write()
 
+    def _parse_sed(self, sed):
+        assert sed[0] == 's'
+        delim = sed[1]
+        match = re.fullmatch(f's{delim}(.+){delim}(.*){delim}(.*)', sed)
+        assert match is not None
+        regex = re.compile(match.group(1))
+        format_str = match.group(2)
+        is_global = match.group(3) == 'g'
+        return regex, format_str, is_global
+
+    def _process_sed(self, sed):
+        self._log(f"Processing sed: {sed}")
+        regex, format_str, is_global = self._parse_sed(sed)
+
+        for filepath in self._dst_lib_file_paths():
+            file = FileLines(filepath)
+            for i, line in enumerate(file.lines):
+                modified = False
+                while True:
+                    match = regex.search(line)
+                    if match is None:
+                        break
+                    replacement = format_str.format(match.groups(''), match.groupdict(''))
+                    b = match.start()
+                    e = match.end()
+                    line = line[:b] + replacement + line[e:]
+                    modified = True
+                    if not is_global:
+                        break
+                if modified:
+                    self._log(f"\t- {file.lines[i][:-1]}")
+                    self._log(f"\t+ {line[:-1]}")
+                file.lines[i] = line
+            file.write()
+
+    def _process_seds(self):
+        self._log("Processing seds")
+        for sed in self._seds:
+            self._process_sed(sed)
+
+
+
     def go(self):
         self._copy_source_lib()
         self._copy_zstd_deps()
@@ -604,6 +650,7 @@
         self._remove_excludes()
         self._rewrite_includes()
         self._replace_xxh64_prefix()
+        self._process_seds()
 
 
 def parse_optional_pair(defines: [str]) -> [(str, Optional[str])]:
@@ -641,6 +688,7 @@
     parser.add_argument("--xxh64-state", default=None, help="Alternate XXH64 state type (excluding _) e.g. --xxh64-state='struct xxh64_state'")
     parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64")
     parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '<stddef\\.h>=<linux/types.h>')")
+    parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.")
     parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)")
     parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed mutliple times)")
     parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition")
@@ -656,6 +704,11 @@
         if name in args.undefs:
             raise RuntimeError(f"{name} is both defined and undefined!")
 
+    # Always set tracing to 0
+    if "ZSTD_NO_TRACE" not in (arg[0] for arg in args.defs):
+        args.defs.append(("ZSTD_NO_TRACE", None))
+        args.defs.append(("ZSTD_TRACE", "0"))
+
     args.replaces = parse_pair(args.replaces)
     for name, _ in args.replaces:
         if name in args.undefs or name in args.defs:
@@ -688,7 +741,8 @@
         args.defs,
         args.replaces,
         args.undefs,
-        args.excludes
+        args.excludes,
+        args.seds,
     ).go()
 
 if __name__ == "__main__":
diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile
index b8a65e9..c391df7 100644
--- a/contrib/linux-kernel/Makefile
+++ b/contrib/linux-kernel/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -22,6 +22,10 @@
 		--xxh64-prefix 'xxh64' \
 		--rewrite-include '<limits\.h>=<linux/limits.h>' \
 		--rewrite-include '<stddef\.h>=<linux/types.h>' \
+		--rewrite-include '"\.\./zstd.h"=<linux/zstd.h>' \
+		--rewrite-include '"(\.\./)?zstd_errors.h"=<linux/zstd_errors.h>' \
+		--sed 's,/\*\*\*,/* *,g' \
+		--sed 's,/\*\*,/*,g' \
 		-DZSTD_NO_INTRINSICS \
 		-DZSTD_NO_UNUSED_FUNCTIONS \
 		-DZSTD_LEGACY_SUPPORT=0 \
@@ -45,7 +49,12 @@
 		-U_MSC_VER \
 		-U_WIN32 \
 		-RZSTDLIB_VISIBILITY= \
-		-RZSTDERRORLIB_VISIBILITY=
+		-RZSTDERRORLIB_VISIBILITY= \
+		-DZSTD_HAVE_WEAK_SYMBOLS=0 \
+		-DZSTD_TRACE=0 \
+		-DZSTD_NO_TRACE
+	mv linux/lib/zstd/zstd.h linux/include/linux/zstd_lib.h
+	mv linux/lib/zstd/zstd_errors.h linux/include/linux/
 	cp linux_zstd.h linux/include/linux/zstd.h
 	cp zstd_compress_module.c linux/lib/zstd
 	cp zstd_decompress_module.c linux/lib/zstd
@@ -60,15 +69,18 @@
 	rm -f $(LINUX)/include/linux/zstd_errors.h
 	rm -rf $(LINUX)/lib/zstd
 	cp linux/include/linux/zstd.h $(LINUX)/include/linux
+	cp linux/include/linux/zstd_lib.h $(LINUX)/include/linux
+	cp linux/include/linux/zstd_errors.h $(LINUX)/include/linux
 	cp -r linux/lib/zstd $(LINUX)/lib
 
 import-upstream:
 	rm -rf $(LINUX)/lib/zstd
 	mkdir $(LINUX)/lib/zstd
-	cp ../../lib/zstd.h $(LINUX)/lib/zstd
+	cp ../../lib/zstd.h $(LINUX)/include/linux/zstd_lib.h
 	cp -r ../../lib/common $(LINUX)/lib/zstd
 	cp -r ../../lib/compress $(LINUX)/lib/zstd
 	cp -r ../../lib/decompress $(LINUX)/lib/zstd
+	mv $(LINUX)/lib/zstd/zstd_errors.h $(LINUX)/include/linux
 	rm $(LINUX)/lib/zstd/common/threading.*
 	rm $(LINUX)/lib/zstd/common/pool.*
 	rm $(LINUX)/lib/zstd/common/xxhash.*
diff --git a/contrib/linux-kernel/decompress_sources.h b/contrib/linux-kernel/decompress_sources.h
index 907753e..f35bef0 100644
--- a/contrib/linux-kernel/decompress_sources.h
+++ b/contrib/linux-kernel/decompress_sources.h
@@ -1,4 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
 /*
  * This file includes every .c file needed for decompression.
diff --git a/contrib/linux-kernel/linux.mk b/contrib/linux-kernel/linux.mk
index 06bf079..19485e3 100644
--- a/contrib/linux-kernel/linux.mk
+++ b/contrib/linux-kernel/linux.mk
@@ -1,4 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
 obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
diff --git a/contrib/linux-kernel/linux_zstd.h b/contrib/linux-kernel/linux_zstd.h
index dcd1ec1..446ecab 100644
--- a/contrib/linux-kernel/linux_zstd.h
+++ b/contrib/linux-kernel/linux_zstd.h
@@ -1,18 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
  */
 
 #ifndef LINUX_ZSTD_H
@@ -27,6 +22,8 @@
 
 /* ======   Dependency   ====== */
 #include <linux/types.h>
+#include <linux/zstd_errors.h>
+#include <linux/zstd_lib.h>
 
 /* ======   Helper Functions   ====== */
 /**
@@ -46,12 +43,17 @@
 unsigned int zstd_is_error(size_t code);
 
 /**
+ * enum zstd_error_code - zstd error codes
+ */
+typedef ZSTD_ErrorCode zstd_error_code;
+
+/**
  * zstd_get_error_code() - translates an error function result to an error code
  * @code:  The function result for which zstd_is_error(code) is true.
  *
  * Return: A unique error code for this error.
  */
-int zstd_get_error_code(size_t code);
+zstd_error_code zstd_get_error_code(size_t code);
 
 /**
  * zstd_get_error_name() - translates an error function result to a string
@@ -61,76 +63,67 @@
  */
 const char *zstd_get_error_name(size_t code);
 
+/**
+ * zstd_min_clevel() - minimum allowed compression level
+ *
+ * Return: The minimum allowed compression level.
+ */
+int zstd_min_clevel(void);
+
+/**
+ * zstd_max_clevel() - maximum allowed compression level
+ *
+ * Return: The maximum allowed compression level.
+ */
+int zstd_max_clevel(void);
+
 /* ======   Parameter Selection   ====== */
 
 /**
  * enum zstd_strategy - zstd compression search strategy
  *
- * From faster to stronger.
+ * From faster to stronger. See zstd_lib.h.
  */
-enum zstd_strategy {
-	zstd_fast = 1,
-	zstd_dfast = 2,
-	zstd_greedy = 3,
-	zstd_lazy = 4,
-	zstd_lazy2 = 5,
-	zstd_btlazy2 = 6,
-	zstd_btopt = 7,
-	zstd_btultra = 8,
-	zstd_btultra2 = 9
-};
+typedef ZSTD_strategy zstd_strategy;
 
 /**
  * struct zstd_compression_parameters - zstd compression parameters
- * @window_log:    Log of the largest match distance. Larger means more
- *                 compression, and more memory needed during decompression.
- * @chain_log:     Fully searched segment. Larger means more compression,
- *                 slower, and more memory (useless for fast).
- * @hash_log:      Dispatch table. Larger means more compression,
- *                 slower, and more memory.
- * @search_log:    Number of searches. Larger means more compression and slower.
- * @search_length: Match length searched. Larger means faster decompression,
- *                 sometimes less compression.
- * @target_length: Acceptable match size for optimal parser (only). Larger means
- *                 more compression, and slower.
- * @strategy:      The zstd compression strategy.
+ * @windowLog:    Log of the largest match distance. Larger means more
+ *                compression, and more memory needed during decompression.
+ * @chainLog:     Fully searched segment. Larger means more compression,
+ *                slower, and more memory (useless for fast).
+ * @hashLog:      Dispatch table. Larger means more compression,
+ *                slower, and more memory.
+ * @searchLog:    Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ *                sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ *                more compression, and slower.
+ * @strategy:     The zstd compression strategy.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_compression_parameters {
-	unsigned int window_log;
-	unsigned int chain_log;
-	unsigned int hash_log;
-	unsigned int search_log;
-	unsigned int search_length;
-	unsigned int target_length;
-	enum zstd_strategy strategy;
-};
+typedef ZSTD_compressionParameters zstd_compression_parameters;
 
 /**
  * struct zstd_frame_parameters - zstd frame parameters
- * @content_size_flag: Controls whether content size will be present in the
- *                     frame header (when known).
- * @checksum_flag:     Controls whether a 32-bit checksum is generated at the
- *                     end of the frame for error detection.
- * @no_dict_id_flag:   Controls whether dictID will be saved into the frame
- *                     header when using dictionary compression.
+ * @contentSizeFlag: Controls whether content size will be present in the
+ *                   frame header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
+ *                   end of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame
+ *                   header when using dictionary compression.
  *
- * The default value is all fields set to 0.
+ * The default value is all fields set to 0. See zstd_lib.h.
  */
-struct zstd_frame_parameters {
-	unsigned int content_size_flag;
-	unsigned int checksum_flag;
-	unsigned int no_dict_id_flag;
-};
+typedef ZSTD_frameParameters zstd_frame_parameters;
 
 /**
  * struct zstd_parameters - zstd parameters
- * @cparams: The compression parameters.
- * @fparams: The frame parameters.
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
  */
-struct zstd_parameters {
-	struct zstd_compression_parameters cparams;
-	struct zstd_frame_parameters fparams;
-};
+typedef ZSTD_parameters zstd_parameters;
 
 /**
  * zstd_get_params() - returns zstd_parameters for selected level
@@ -140,12 +133,12 @@
  *
  * Return:              The selected zstd_parameters.
  */
-struct zstd_parameters zstd_get_params(int level,
+zstd_parameters zstd_get_params(int level,
 	unsigned long long estimated_src_size);
 
 /* ======   Single-pass Compression   ====== */
 
-typedef struct ZSTD_CCtx_s zstd_cctx;
+typedef ZSTD_CCtx zstd_cctx;
 
 /**
  * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
@@ -158,8 +151,7 @@
  * Return:      A lower bound on the size of the workspace that is passed to
  *              zstd_init_cctx().
  */
-size_t zstd_cctx_workspace_bound(
-	const struct zstd_compression_parameters *parameters);
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
 
 /**
  * zstd_init_cctx() - initialize a zstd compression context
@@ -186,11 +178,11 @@
  *                zstd_is_error().
  */
 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
-	const void *src, size_t src_size, const struct zstd_parameters *parameters);
+	const void *src, size_t src_size, const zstd_parameters *parameters);
 
 /* ======   Single-pass Decompression   ====== */
 
-typedef struct ZSTD_DCtx_s zstd_dctx;
+typedef ZSTD_DCtx zstd_dctx;
 
 /**
  * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
@@ -236,12 +228,10 @@
  * @size: Size of the input buffer.
  * @pos:  Position where reading stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_in_buffer {
-	const void *src;
-	size_t size;
-	size_t pos;
-};
+typedef ZSTD_inBuffer zstd_in_buffer;
 
 /**
  * struct zstd_out_buffer - output buffer for streaming
@@ -249,16 +239,14 @@
  * @size: Size of the output buffer.
  * @pos:  Position where writing stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_out_buffer {
-	void *dst;
-	size_t size;
-	size_t pos;
-};
+typedef ZSTD_outBuffer zstd_out_buffer;
 
 /* ======   Streaming Compression   ====== */
 
-typedef struct ZSTD_CCtx_s zstd_cstream;
+typedef ZSTD_CStream zstd_cstream;
 
 /**
  * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
@@ -267,8 +255,7 @@
  * Return:   A lower bound on the size of the workspace that is passed to
  *           zstd_init_cstream().
  */
-size_t zstd_cstream_workspace_bound(
-	const struct zstd_compression_parameters *cparams);
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
 
 /**
  * zstd_init_cstream() - initialize a zstd streaming compression context
@@ -285,7 +272,7 @@
  *
  * Return:            The zstd streaming compression context or NULL on error.
  */
-zstd_cstream *zstd_init_cstream(const struct zstd_parameters *parameters,
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
 	unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
 
 /**
@@ -320,8 +307,8 @@
  *           function call or an error, which can be checked using
  *           zstd_is_error().
  */
-size_t zstd_compress_stream(zstd_cstream *cstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input);
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
 
 /**
  * zstd_flush_stream() - flush internal buffers into output
@@ -336,7 +323,7 @@
  * Return:   The number of bytes still present within internal buffers or an
  *           error, which can be checked using zstd_is_error().
  */
-size_t zstd_flush_stream(zstd_cstream *cstream, struct zstd_out_buffer *output);
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
 /**
  * zstd_end_stream() - flush internal buffers into output and end the frame
@@ -350,11 +337,11 @@
  * Return:   The number of bytes still present within internal buffers or an
  *           error, which can be checked using zstd_is_error().
  */
-size_t zstd_end_stream(zstd_cstream *cstream, struct zstd_out_buffer *output);
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
 /* ======   Streaming Decompression   ====== */
 
-typedef struct ZSTD_DCtx_s zstd_dstream;
+typedef ZSTD_DStream zstd_dstream;
 
 /**
  * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
@@ -411,8 +398,8 @@
  *           using zstd_is_error(). The size hint will never load more than the
  *           frame.
  */
-size_t zstd_decompress_stream(zstd_dstream *dstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input);
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
 
 /* ======   Frame Inspection Functions ====== */
 
@@ -431,20 +418,21 @@
 
 /**
  * struct zstd_frame_params - zstd frame parameters stored in the frame header
- * @frame_content_size: The frame content size, or 0 if not present.
- * @window_size:        The window size, or 0 if the frame is a skippable frame.
- * @dict_id:            The dictionary id, or 0 if not present.
- * @checksum_flag:      Whether a checksum was used.
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ *                    present.
+ * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax:     The maximum block size.
+ * @frameType:        The frame type (zstd or skippable)
+ * @headerSize:       The size of the frame header.
+ * @dictID:           The dictionary id, or 0 if not present.
+ * @checksumFlag:     Whether a checksum was used.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_frame_params {
-	unsigned long long frame_content_size;
-	unsigned int window_size;
-	unsigned int dict_id;
-	unsigned int checksum_flag;
-};
+typedef ZSTD_frameHeader zstd_frame_header;
 
 /**
- * zstd_get_frame_params() - extracts parameters from a zstd or skippable frame
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
  * @params:   On success the frame parameters are written here.
  * @src:      The source buffer. It must point to a zstd or skippable frame.
  * @src_size: The size of the source buffer.
@@ -453,7 +441,7 @@
  *            must be provided to make forward progress. Otherwise it returns
  *            an error, which can be checked using zstd_is_error().
  */
-size_t zstd_get_frame_params(struct zstd_frame_params *params, const void *src,
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
 	size_t src_size);
 
 #endif  /* LINUX_ZSTD_H */
diff --git a/contrib/linux-kernel/mem.h b/contrib/linux-kernel/mem.h
index 54832a6..4b5db57 100644
--- a/contrib/linux-kernel/mem.h
+++ b/contrib/linux-kernel/mem.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/Makefile b/contrib/linux-kernel/test/Makefile
index 80bce74..2908839 100644
--- a/contrib/linux-kernel/test/Makefile
+++ b/contrib/linux-kernel/test/Makefile
@@ -1,15 +1,24 @@
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 
 LINUX := ../linux
 LINUX_ZSTDLIB := $(LINUX)/lib/zstd
 
-CPPFLAGS += -I$(LINUX)/include -I$(LINUX_ZSTDLIB) -Iinclude -DNDEBUG
+CPPFLAGS += -I$(LINUX)/include -I$(LINUX_ZSTDLIB) -Iinclude -DNDEBUG -Wno-deprecated-declarations
 # Don't poison the workspace, it currently doesn't work with static allocation and workspace reuse
 CPPFLAGS += -DZSTD_ASAN_DONT_POISON_WORKSPACE
 
 LINUX_ZSTD_MODULE     := $(wildcard $(LINUX_ZSTDLIB)/*.c)
-LINUX_ZSTD_COMMON     := $(wildcard $(LINUX_ZSTDLIB)/common/*.c) 
-LINUX_ZSTD_COMPRESS   := $(wildcard $(LINUX_ZSTDLIB)/compress/*.c) 
-LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c) 
+LINUX_ZSTD_COMMON     := $(wildcard $(LINUX_ZSTDLIB)/common/*.c)
+LINUX_ZSTD_COMPRESS   := $(wildcard $(LINUX_ZSTDLIB)/compress/*.c)
+LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c)
 LINUX_ZSTD_FILES      := $(LINUX_ZSTD_MODULE) $(LINUX_ZSTD_COMMON) $(LINUX_ZSTD_COMPRESS) $(LINUX_ZSTD_DECOMPRESS)
 LINUX_ZSTD_OBJECTS    := $(LINUX_ZSTD_FILES:.c=.o)
 
@@ -29,6 +38,7 @@
 
 .PHONY:
 clean:
+	$(RM) -f $(LINUX_ZSTDLIB)/*.o
 	$(RM) -f $(LINUX_ZSTDLIB)/**/*.o
 	$(RM) -f *.o *.a
 	$(RM) -f test
diff --git a/contrib/linux-kernel/test/include/asm/unaligned.h b/contrib/linux-kernel/test/include/asm/unaligned.h
index 6576b37..02c2d74 100644
--- a/contrib/linux-kernel/test/include/asm/unaligned.h
+++ b/contrib/linux-kernel/test/include/asm/unaligned.h
@@ -4,13 +4,23 @@
 #include <assert.h>
 #include <linux/types.h>
 
-#define _LITTLE_ENDIAN 1
+#ifndef __LITTLE_ENDIAN
+# if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN__)
+#  define __LITTLE_ENDIAN 1
+# endif
+#endif
+
+#ifdef __LITTLE_ENDIAN
+# define _IS_LITTLE_ENDIAN 1
+#else
+# define _IS_LITTLE_ENDIAN 0
+#endif
 
 static unsigned _isLittleEndian(void)
 {
     const union { uint32_t u; uint8_t c[4]; } one = { 1 };
-    assert(_LITTLE_ENDIAN == one.c[0]);
-    return _LITTLE_ENDIAN;
+    assert(_IS_LITTLE_ENDIAN == one.c[0]);
+    return _IS_LITTLE_ENDIAN;
 }
 
 static uint16_t _swap16(uint16_t in)
@@ -165,7 +175,7 @@
     (void)0;                                                                   \
   })
 
-#if _LITTLE_ENDIAN
+#if _IS_LITTLE_ENDIAN
 #  define get_unaligned __get_unaligned_le
 #  define put_unaligned __put_unaligned_le
 #else
diff --git a/contrib/linux-kernel/test/include/linux/compiler.h b/contrib/linux-kernel/test/include/linux/compiler.h
index b614b27..ea3422e 100644
--- a/contrib/linux-kernel/test/include/linux/compiler.h
+++ b/contrib/linux-kernel/test/include/linux/compiler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,4 +14,8 @@
 #define inline __inline __attribute__((unused))
 #endif
 
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
 #endif
diff --git a/contrib/linux-kernel/test/include/linux/errno.h b/contrib/linux-kernel/test/include/linux/errno.h
index 11c54b9..b247522 100644
--- a/contrib/linux-kernel/test/include/linux/errno.h
+++ b/contrib/linux-kernel/test/include/linux/errno.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/kernel.h b/contrib/linux-kernel/test/include/linux/kernel.h
index 1124f02..1f702ab 100644
--- a/contrib/linux-kernel/test/include/linux/kernel.h
+++ b/contrib/linux-kernel/test/include/linux/kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -12,4 +12,8 @@
 
 #define WARN_ON(x)
 
+#define PTR_ALIGN(p, a) (typeof(p))ALIGN((unsigned long long)(p), (a))
+#define ALIGN(x, a)         ALIGN_MASK((x), (a) - 1)
+#define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
 #endif
diff --git a/contrib/linux-kernel/test/include/linux/limits.h b/contrib/linux-kernel/test/include/linux/limits.h
index 7f8d18d..db9c099 100644
--- a/contrib/linux-kernel/test/include/linux/limits.h
+++ b/contrib/linux-kernel/test/include/linux/limits.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/math64.h b/contrib/linux-kernel/test/include/linux/math64.h
index 4bc7f4b..8eefa2d 100644
--- a/contrib/linux-kernel/test/include/linux/math64.h
+++ b/contrib/linux-kernel/test/include/linux/math64.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/module.h b/contrib/linux-kernel/test/include/linux/module.h
index 8fd6693..be6d20d 100644
--- a/contrib/linux-kernel/test/include/linux/module.h
+++ b/contrib/linux-kernel/test/include/linux/module.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/printk.h b/contrib/linux-kernel/test/include/linux/printk.h
index 2ca2fb3..eab08e0 100644
--- a/contrib/linux-kernel/test/include/linux/printk.h
+++ b/contrib/linux-kernel/test/include/linux/printk.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/stddef.h b/contrib/linux-kernel/test/include/linux/stddef.h
index c00d065..8538eb3 100644
--- a/contrib/linux-kernel/test/include/linux/stddef.h
+++ b/contrib/linux-kernel/test/include/linux/stddef.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/swab.h b/contrib/linux-kernel/test/include/linux/swab.h
index 693b797..783046b 100644
--- a/contrib/linux-kernel/test/include/linux/swab.h
+++ b/contrib/linux-kernel/test/include/linux/swab.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/include/linux/types.h b/contrib/linux-kernel/test/include/linux/types.h
index 6db834b..459a457 100644
--- a/contrib/linux-kernel/test/include/linux/types.h
+++ b/contrib/linux-kernel/test/include/linux/types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) 2016-2021, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/static_test.c b/contrib/linux-kernel/test/static_test.c
index 53c559c..50c594c 100644
--- a/contrib/linux-kernel/test/static_test.c
+++ b/contrib/linux-kernel/test/static_test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/test.c b/contrib/linux-kernel/test/test.c
index 9579976..9064be7 100644
--- a/contrib/linux-kernel/test/test.c
+++ b/contrib/linux-kernel/test/test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 7-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -57,10 +57,10 @@
   fprintf(stderr, "testing btrfs use cases... ");
   size_t const size = MIN(data->dataSize, 128 * 1024);
   for (int level = -1; level < 16; ++level) {
-    struct zstd_parameters params = zstd_get_params(level, size);
-    CONTROL(params.cparams.window_log <= 17);
+    zstd_parameters params = zstd_get_params(level, size);
+    CONTROL(params.cParams.windowLog <= 17);
     size_t const workspaceSize =
-        MAX(zstd_cstream_workspace_bound(&params.cparams),
+        MAX(zstd_cstream_workspace_bound(&params.cParams),
             zstd_dstream_workspace_bound(size));
     void *workspace = malloc(workspaceSize);
     CONTROL(workspace != NULL);
@@ -72,8 +72,8 @@
     {
       zstd_cstream *cctx = zstd_init_cstream(&params, size, workspace, workspaceSize);
       CONTROL(cctx != NULL);
-      struct zstd_out_buffer out = {NULL, 0, 0};
-      struct zstd_in_buffer in = {NULL, 0, 0};
+      zstd_out_buffer out = {NULL, 0, 0};
+      zstd_in_buffer in = {NULL, 0, 0};
       for (;;) {
         if (in.pos == in.size) {
           in.src = ip;
@@ -107,10 +107,10 @@
     op = data->data2;
     oend = op + size;
     {
-      zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cparams.window_log, workspace, workspaceSize);
+      zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cParams.windowLog, workspace, workspaceSize);
       CONTROL(dctx != NULL);
-      struct zstd_out_buffer out = {NULL, 0, 0};
-      struct zstd_in_buffer in = {NULL, 0, 0};
+      zstd_out_buffer out = {NULL, 0, 0};
+      zstd_in_buffer in = {NULL, 0, 0};
       for (;;) {
         if (in.pos == in.size) {
           in.src = ip;
@@ -144,8 +144,8 @@
     fprintf(stderr, "Testing decompress unzstd... ");
     size_t cSize;
     {
-        struct zstd_parameters params = zstd_get_params(19, 0);
-        size_t const wkspSize = zstd_cctx_workspace_bound(&params.cparams);
+        zstd_parameters params = zstd_get_params(19, 0);
+        size_t const wkspSize = zstd_cctx_workspace_bound(&params.cParams);
         void* wksp = malloc(wkspSize);
         CONTROL(wksp != NULL);
         zstd_cctx* cctx = zstd_init_cctx(wksp, wkspSize);
@@ -169,6 +169,13 @@
     fprintf(stderr, "Ok\n");
 }
 
+static void test_f2fs() {
+  fprintf(stderr, "testing f2fs uses... ");
+  CONTROL(zstd_min_clevel() < 0);
+  CONTROL(zstd_max_clevel() == 22);
+  fprintf(stderr, "Ok\n");
+}
+
 static char *g_stack = NULL;
 
 static void __attribute__((noinline)) use(void *x) {
@@ -195,6 +202,7 @@
 
 static void test_stack_usage(test_data_t const *data) {
   set_stack();
+  test_f2fs();
   test_btrfs(data);
   test_decompress_unzstd(data);
   check_stack();
@@ -202,6 +210,7 @@
 
 int main(void) {
   test_data_t data = create_test_data();
+  test_f2fs();
   test_btrfs(&data);
   test_decompress_unzstd(&data);
   test_stack_usage(&data);
diff --git a/contrib/linux-kernel/zstd_compress_module.c b/contrib/linux-kernel/zstd_compress_module.c
index bab79af..37d08ff 100644
--- a/contrib/linux-kernel/zstd_compress_module.c
+++ b/contrib/linux-kernel/zstd_compress_module.c
@@ -1,75 +1,33 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/zstd.h>
 
-#include "zstd.h"
 #include "common/zstd_deps.h"
 #include "common/zstd_internal.h"
 
-static void zstd_check_structs(void) {
-	/* Check that the structs have the same size. */
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_parameters) ==
-		sizeof(struct zstd_parameters));
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_compressionParameters) ==
-		sizeof(struct zstd_compression_parameters));
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_frameParameters) ==
-		sizeof(struct zstd_frame_parameters));
-	/* Zstd guarantees that the layout of the structs never change. Verify it. */
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_parameters, cParams) ==
-		offsetof(struct zstd_parameters, cparams));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_parameters, fParams) ==
-		offsetof(struct zstd_parameters, fparams));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, windowLog) ==
-		offsetof(struct zstd_compression_parameters, window_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, chainLog) ==
-		offsetof(struct zstd_compression_parameters, chain_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, hashLog) ==
-		offsetof(struct zstd_compression_parameters, hash_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, searchLog) ==
-		offsetof(struct zstd_compression_parameters, search_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, minMatch) ==
-		offsetof(struct zstd_compression_parameters, search_length));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, targetLength) ==
-		offsetof(struct zstd_compression_parameters, target_length));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, strategy) ==
-		offsetof(struct zstd_compression_parameters, strategy));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_frameParameters, contentSizeFlag) ==
-		offsetof(struct zstd_frame_parameters, content_size_flag));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_frameParameters, checksumFlag) ==
-		offsetof(struct zstd_frame_parameters, checksum_flag));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_frameParameters, noDictIDFlag) ==
-		offsetof(struct zstd_frame_parameters, no_dict_id_flag));
-	/* Check that the strategies are the same. This can change. */
-	ZSTD_STATIC_ASSERT((int)ZSTD_fast == (int)zstd_fast);
-	ZSTD_STATIC_ASSERT((int)ZSTD_dfast == (int)zstd_dfast);
-	ZSTD_STATIC_ASSERT((int)ZSTD_greedy == (int)zstd_greedy);
-	ZSTD_STATIC_ASSERT((int)ZSTD_lazy == (int)zstd_lazy);
-	ZSTD_STATIC_ASSERT((int)ZSTD_lazy2 == (int)zstd_lazy2);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btlazy2 == (int)zstd_btlazy2);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btopt == (int)zstd_btopt);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btultra == (int)zstd_btultra);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btultra2 == (int)zstd_btultra2);
-	/* Check input buffer */
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_inBuffer) == sizeof(struct zstd_in_buffer));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_inBuffer, src) ==
-		offsetof(struct zstd_in_buffer, src));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_inBuffer, size) ==
-		offsetof(struct zstd_in_buffer, size));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_inBuffer, pos) ==
-		offsetof(struct zstd_in_buffer, pos));
-	/* Check output buffer */
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_outBuffer) ==
-		sizeof(struct zstd_out_buffer));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_outBuffer, dst) ==
-		offsetof(struct zstd_out_buffer, dst));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_outBuffer, size) ==
-		offsetof(struct zstd_out_buffer, size));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_outBuffer, pos) ==
-		offsetof(struct zstd_out_buffer, pos));
+int zstd_min_clevel(void)
+{
+	return ZSTD_minCLevel();
 }
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+	return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
 
 size_t zstd_compress_bound(size_t src_size)
 {
@@ -77,26 +35,16 @@
 }
 EXPORT_SYMBOL(zstd_compress_bound);
 
-struct zstd_parameters zstd_get_params(int level,
+zstd_parameters zstd_get_params(int level,
 	unsigned long long estimated_src_size)
 {
-	const ZSTD_parameters params = ZSTD_getParams(level, estimated_src_size, 0);
-	struct zstd_parameters out;
-
-	/* no-op */
-	zstd_check_structs();
-	ZSTD_memcpy(&out, &params, sizeof(out));
-	return out;
+	return ZSTD_getParams(level, estimated_src_size, 0);
 }
 EXPORT_SYMBOL(zstd_get_params);
 
-size_t zstd_cctx_workspace_bound(
-	const struct zstd_compression_parameters *cparams)
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
 {
-	ZSTD_compressionParameters p;
-
-	ZSTD_memcpy(&p, cparams, sizeof(p));
-	return ZSTD_estimateCCtxSize_usingCParams(p);
+	return ZSTD_estimateCCtxSize_usingCParams(*cparams);
 }
 EXPORT_SYMBOL(zstd_cctx_workspace_bound);
 
@@ -109,29 +57,21 @@
 EXPORT_SYMBOL(zstd_init_cctx);
 
 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
-	const void *src, size_t src_size, const struct zstd_parameters *parameters)
+	const void *src, size_t src_size, const zstd_parameters *parameters)
 {
-	ZSTD_parameters p;
-
-	ZSTD_memcpy(&p, parameters, sizeof(p));
-	return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, p);
+	return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, *parameters);
 }
 EXPORT_SYMBOL(zstd_compress_cctx);
 
-size_t zstd_cstream_workspace_bound(
-	const struct zstd_compression_parameters *cparams)
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
 {
-	ZSTD_compressionParameters p;
-
-	ZSTD_memcpy(&p, cparams, sizeof(p));
-	return ZSTD_estimateCStreamSize_usingCParams(p);
+	return ZSTD_estimateCStreamSize_usingCParams(*cparams);
 }
 EXPORT_SYMBOL(zstd_cstream_workspace_bound);
 
-zstd_cstream *zstd_init_cstream(const struct zstd_parameters *parameters,
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
 	unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
 {
-	ZSTD_parameters p;
 	zstd_cstream *cstream;
 	size_t ret;
 
@@ -146,8 +86,7 @@
 	if (pledged_src_size == 0)
 		pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
 
-	ZSTD_memcpy(&p, parameters, sizeof(p));
-	ret = ZSTD_initCStream_advanced(cstream, NULL, 0, p, pledged_src_size);
+	ret = ZSTD_initCStream_advanced(cstream, NULL, 0, *parameters, pledged_src_size);
 	if (ZSTD_isError(ret))
 		return NULL;
 
@@ -162,43 +101,22 @@
 }
 EXPORT_SYMBOL(zstd_reset_cstream);
 
-size_t zstd_compress_stream(zstd_cstream *cstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input)
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
 {
-	ZSTD_outBuffer o;
-	ZSTD_inBuffer i;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ZSTD_memcpy(&i, input, sizeof(i));
-	ret = ZSTD_compressStream(cstream, &o, &i);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	ZSTD_memcpy(input, &i, sizeof(i));
-	return ret;
+	return ZSTD_compressStream(cstream, output, input);
 }
 EXPORT_SYMBOL(zstd_compress_stream);
 
-size_t zstd_flush_stream(zstd_cstream *cstream, struct zstd_out_buffer *output)
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
 {
-	ZSTD_outBuffer o;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ret = ZSTD_flushStream(cstream, &o);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	return ret;
+	return ZSTD_flushStream(cstream, output);
 }
 EXPORT_SYMBOL(zstd_flush_stream);
 
-size_t zstd_end_stream(zstd_cstream *cstream, struct zstd_out_buffer *output)
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
 {
-	ZSTD_outBuffer o;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ret = ZSTD_endStream(cstream, &o);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	return ret;
+	return ZSTD_endStream(cstream, output);
 }
 EXPORT_SYMBOL(zstd_end_stream);
 
diff --git a/contrib/linux-kernel/zstd_decompress_module.c b/contrib/linux-kernel/zstd_decompress_module.c
index 988fdb5..15005cd 100644
--- a/contrib/linux-kernel/zstd_decompress_module.c
+++ b/contrib/linux-kernel/zstd_decompress_module.c
@@ -1,13 +1,20 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/zstd.h>
 
-#include "zstd.h"
 #include "common/zstd_deps.h"
-#include "common/zstd_errors.h"
 
 /* Common symbols. zstd_compress must depend on zstd_decompress. */
 
@@ -17,7 +24,7 @@
 }
 EXPORT_SYMBOL(zstd_is_error);
 
-int zstd_get_error_code(size_t code)
+zstd_error_code zstd_get_error_code(size_t code)
 {
 	return ZSTD_getErrorCode(code);
 }
@@ -74,19 +81,10 @@
 }
 EXPORT_SYMBOL(zstd_reset_dstream);
 
-size_t zstd_decompress_stream(zstd_dstream *dstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input)
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
 {
-	ZSTD_outBuffer o;
-	ZSTD_inBuffer i;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ZSTD_memcpy(&i, input, sizeof(i));
-	ret = ZSTD_decompressStream(dstream, &o, &i);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	ZSTD_memcpy(input, &i, sizeof(i));
-	return ret;
+	return ZSTD_decompressStream(dstream, output, input);
 }
 EXPORT_SYMBOL(zstd_decompress_stream);
 
@@ -96,27 +94,12 @@
 }
 EXPORT_SYMBOL(zstd_find_frame_compressed_size);
 
-size_t zstd_get_frame_params(struct zstd_frame_params *params, const void *src,
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
 	size_t src_size)
 {
-	ZSTD_frameHeader h;
-	const size_t ret = ZSTD_getFrameHeader(&h, src, src_size);
-
-	if (ret != 0)
-		return ret;
-
-	if (h.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
-		params->frame_content_size = h.frameContentSize;
-	else
-		params->frame_content_size = 0;
-
-	params->window_size = h.windowSize;
-	params->dict_id = h.dictID;
-	params->checksum_flag = h.checksumFlag;
-
-	return ret;
+	return ZSTD_getFrameHeader(header, src, src_size);
 }
-EXPORT_SYMBOL(zstd_get_frame_params);
+EXPORT_SYMBOL(zstd_get_frame_header);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/contrib/linux-kernel/zstd_deps.h b/contrib/linux-kernel/zstd_deps.h
index 4a6d35f..853b724 100644
--- a/contrib/linux-kernel/zstd_deps.h
+++ b/contrib/linux-kernel/zstd_deps.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -71,7 +72,7 @@
 #endif /* ZSTD_DEPS_MATH64 */
 #endif /* ZSTD_DEPS_NEED_MATH64 */
 
-/* 
+/*
  * This is only requested when DEBUGLEVEL >= 1, meaning
  * it is disabled in production.
  * Need:
@@ -88,7 +89,7 @@
 #endif /* ZSTD_DEPS_ASSERT */
 #endif /* ZSTD_DEPS_NEED_ASSERT */
 
-/* 
+/*
  * This is only requested when DEBUGLEVEL >= 2, meaning
  * it is disabled in production.
  * Need:
@@ -105,7 +106,7 @@
 #endif /* ZSTD_DEPS_IO */
 #endif /* ZSTD_DEPS_NEED_IO */
 
-/* 
+/*
  * Only requested when MSAN is enabled.
  * Need:
  * intptr_t
diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile
index 8d2b193..25265e7 100644
--- a/contrib/pzstd/Makefile
+++ b/contrib/pzstd/Makefile
@@ -30,6 +30,9 @@
 CPPFLAGS ?=
 LDFLAGS  ?=
 
+# PZstd uses legacy APIs
+CFLAGS   += -Wno-deprecated-declarations
+
 # Include flags
 PZSTD_INC  = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I.
 GTEST_INC  = -isystem googletest/googletest/include
diff --git a/contrib/pzstd/Options.h b/contrib/pzstd/Options.h
index f4f2aaa..924543a 100644
--- a/contrib/pzstd/Options.h
+++ b/contrib/pzstd/Options.h
@@ -9,6 +9,9 @@
 #pragma once
 
 #define ZSTD_STATIC_LINKING_ONLY
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
+                                         * and uses deprecated functions
+                                         */
 #include "zstd.h"
 #undef ZSTD_STATIC_LINKING_ONLY
 
diff --git a/contrib/pzstd/Pzstd.cpp b/contrib/pzstd/Pzstd.cpp
index ce142ad..2c09bda 100644
--- a/contrib/pzstd/Pzstd.cpp
+++ b/contrib/pzstd/Pzstd.cpp
@@ -274,7 +274,7 @@
     return;
   }
   {
-    auto err = ZSTD_resetCStream(ctx.get(), 0);
+    auto err = ZSTD_CCtx_reset(ctx.get(), ZSTD_reset_session_only);
     if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
       return;
     }
@@ -432,7 +432,7 @@
     return;
   }
   {
-    auto err = ZSTD_resetDStream(ctx.get());
+    auto err = ZSTD_DCtx_reset(ctx.get(), ZSTD_reset_session_only);
     if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
       return;
     }
diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h
index 033adef..c667c88 100644
--- a/contrib/pzstd/Pzstd.h
+++ b/contrib/pzstd/Pzstd.h
@@ -17,6 +17,9 @@
 #include "utils/ThreadPool.h"
 #include "utils/WorkQueue.h"
 #define ZSTD_STATIC_LINKING_ONLY
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
+                                         * and uses deprecated functions
+                                         */
 #include "zstd.h"
 #undef ZSTD_STATIC_LINKING_ONLY
 
diff --git a/contrib/recovery/Makefile b/contrib/recovery/Makefile
new file mode 100644
index 0000000..9a9f4f2
--- /dev/null
+++ b/contrib/recovery/Makefile
@@ -0,0 +1,35 @@
+# ################################################################
+# Copyright (c) 2019-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+.PHONY: all
+all: recover_directory
+
+ZSTDLIBDIR ?= ../../lib
+PROGRAMDIR ?= ../../programs
+
+CFLAGS     ?= -O3
+CFLAGS     += -I$(ZSTDLIBDIR) -I$(PROGRAMDIR)
+CFLAGS     += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow                 \
+              -Wstrict-aliasing=1 -Wswitch-enum                               \
+              -Wstrict-prototypes -Wundef                                     \
+              -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings      \
+              -Wredundant-decls -Wmissing-prototypes
+CFLAGS     += $(DEBUGFLAGS) $(MOREFLAGS)
+FLAGS       = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+.PHONY: $(ZSTDLIBDIR)/libzstd.a
+$(ZSTDLIBDIR)/libzstd.a:
+	$(MAKE) -C $(ZSTDLIBDIR) libzstd.a
+
+recover_directory: recover_directory.c $(ZSTDLIBDIR)/libzstd.a $(PROGRAMDIR)/util.c
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
+.PHONY: clean
+clean:
+	rm -f recover_directory
diff --git a/contrib/recovery/recover_directory.c b/contrib/recovery/recover_directory.c
new file mode 100644
index 0000000..13f83fd
--- /dev/null
+++ b/contrib/recovery/recover_directory.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2016-2021, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "util.h"
+#include "zstd.h"
+
+#define CHECK(cond, ...)                                                       \
+  do {                                                                         \
+    if (!(cond)) {                                                             \
+      fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__, #cond);  \
+      fprintf(stderr, "" __VA_ARGS__);                                         \
+      fprintf(stderr, "\n");                                                   \
+      exit(1);                                                                 \
+    }                                                                          \
+  } while (0)
+
+static void usage(char const *program) {
+  fprintf(stderr, "USAGE: %s FILE.zst PREFIX\n", program);
+  fprintf(stderr, "FILE.zst: A zstd compressed file with multiple frames\n");
+  fprintf(stderr, "PREFIX:   The output prefix. Uncompressed files will be "
+                  "created named ${PREFIX}0 ${PREFIX}1...\n\n");
+  fprintf(stderr, "This program takes concatenated zstd frames and "
+                  "decompresses them into individual files.\n");
+  fprintf(stderr, "E.g. files created with a command like: zstd -r directory "
+                  "-o file.zst\n");
+}
+
+typedef struct {
+  char *data;
+  size_t size;
+  size_t frames;
+  size_t maxFrameSize;
+} ZstdFrames;
+
+static ZstdFrames readFile(char const *fileName) {
+  U64 const fileSize = UTIL_getFileSize(fileName);
+  CHECK(fileSize != UTIL_FILESIZE_UNKNOWN, "Unknown file size!");
+
+  char *const data = (char *)malloc(fileSize);
+  CHECK(data != NULL, "Allocation failed");
+
+  FILE *file = fopen(fileName, "rb");
+  CHECK(file != NULL, "fopen failed");
+
+  size_t const readSize = fread(data, 1, fileSize, file);
+  CHECK(readSize == fileSize, "fread failed");
+
+  fclose(file);
+  ZstdFrames frames;
+  frames.data = (char *)data;
+  frames.size = fileSize;
+  frames.frames = 0;
+
+  size_t index;
+  size_t maxFrameSize = 0;
+  for (index = 0; index < fileSize;) {
+    size_t const frameSize =
+        ZSTD_findFrameCompressedSize(data + index, fileSize - index);
+    CHECK(!ZSTD_isError(frameSize), "Bad zstd frame: %s",
+          ZSTD_getErrorName(frameSize));
+    if (frameSize > maxFrameSize)
+      maxFrameSize = frameSize;
+    frames.frames += 1;
+    index += frameSize;
+  }
+  CHECK(index == fileSize, "Zstd file corrupt!");
+  frames.maxFrameSize = maxFrameSize;
+
+  return frames;
+}
+
+static int computePadding(size_t numFrames) {
+  return snprintf(NULL, 0, "%u", (unsigned)numFrames);
+}
+
+int main(int argc, char **argv) {
+  if (argc != 3) {
+    usage(argv[0]);
+    exit(1);
+  }
+  char const *const zstdFile = argv[1];
+  char const *const prefix = argv[2];
+
+  ZstdFrames frames = readFile(zstdFile);
+
+  if (frames.frames <= 1) {
+    fprintf(
+        stderr,
+        "%s only has %u zstd frame. Simply use `zstd -d` to decompress it.\n",
+        zstdFile, (unsigned)frames.frames);
+    exit(1);
+  }
+
+  int const padding = computePadding(frames.frames - 1);
+
+  size_t const outFileNameSize = strlen(prefix) + padding + 1;
+  char* outFileName = malloc(outFileNameSize);
+  CHECK(outFileName != NULL, "Allocation failure");
+
+  size_t const bufferSize = 128 * 1024;
+  void *buffer = malloc(bufferSize);
+  CHECK(buffer != NULL, "Allocation failure");
+
+  ZSTD_DCtx* dctx = ZSTD_createDCtx();
+  CHECK(dctx != NULL, "Allocation failure");
+
+  fprintf(stderr, "Recovering %u files...\n", (unsigned)frames.frames);
+
+  size_t index;
+  size_t frame = 0;
+  for (index = 0; index < frames.size; ++frame) {
+    size_t const frameSize =
+        ZSTD_findFrameCompressedSize(frames.data + index, frames.size - index);
+
+    int const ret = snprintf(outFileName, outFileNameSize, "%s%0*u", prefix, padding, (unsigned)frame);
+    CHECK(ret >= 0 && (size_t)ret <= outFileNameSize, "snprintf failed!");
+
+    FILE* outFile = fopen(outFileName, "wb");
+    CHECK(outFile != NULL, "fopen failed");
+
+    ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
+    ZSTD_inBuffer in = {frames.data + index, frameSize, 0};
+    while (in.pos < in.size) {
+        ZSTD_outBuffer out = {buffer, bufferSize, 0};
+        CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &out, &in)), "decompression failed");
+        size_t const writeSize = fwrite(out.dst, 1, out.pos, outFile);
+        CHECK(writeSize == out.pos, "fwrite failed");
+    }
+    fclose(outFile);
+    fprintf(stderr, "Recovered %s\n", outFileName);
+    index += frameSize;
+  }
+  fprintf(stderr, "Complete\n");
+
+  free(outFileName);
+  ZSTD_freeDCtx(dctx);
+  free(buffer);
+  free(frames.data);
+  return 0;
+}
diff --git a/contrib/seekable_format/examples/Makefile b/contrib/seekable_format/examples/Makefile
index 543780f..9df6b75 100644
--- a/contrib/seekable_format/examples/Makefile
+++ b/contrib/seekable_format/examples/Makefile
@@ -13,7 +13,7 @@
 ZSTDLIB_NAME = libzstd.a
 ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
 
-CPPFLAGS += -I../ -I../../../lib -I../../../lib/common
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I../ -I../../../lib -I../../../lib/common
 
 CFLAGS ?= -O3
 CFLAGS += -g
diff --git a/contrib/seekable_format/examples/parallel_compression.c b/contrib/seekable_format/examples/parallel_compression.c
index 69644d2..4118b0a 100644
--- a/contrib/seekable_format/examples/parallel_compression.c
+++ b/contrib/seekable_format/examples/parallel_compression.c
@@ -21,7 +21,6 @@
 #  define SLEEP(x) usleep(x * 1000)
 #endif
 
-#define XXH_NAMESPACE ZSTD_
 #include "xxhash.h"
 
 #include "pool.h"      // use zstd thread pool for demo
diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c
index 7050e0f..e9e2013 100644
--- a/contrib/seekable_format/examples/seekable_decompression.c
+++ b/contrib/seekable_format/examples/seekable_decompression.c
@@ -99,6 +99,9 @@
 
     while (startOffset < endOffset) {
         size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+        if (!result) {
+            break;
+        }
 
         if (ZSTD_isError(result)) {
             fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
diff --git a/contrib/seekable_format/examples/seekable_decompression_mem.c b/contrib/seekable_format/examples/seekable_decompression_mem.c
index c36d222..e7b1c65 100644
--- a/contrib/seekable_format/examples/seekable_decompression_mem.c
+++ b/contrib/seekable_format/examples/seekable_decompression_mem.c
@@ -104,6 +104,9 @@
 
     while (startOffset < endOffset) {
         size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+        if (!result) {
+            break;
+        }
 
         if (ZSTD_isError(result)) {
             fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
diff --git a/contrib/seekable_format/tests/Makefile b/contrib/seekable_format/tests/Makefile
index b00657f..d51deb3 100644
--- a/contrib/seekable_format/tests/Makefile
+++ b/contrib/seekable_format/tests/Makefile
@@ -13,16 +13,16 @@
 ZSTDLIB_NAME = libzstd.a
 ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
 
-CPPFLAGS += -I../ -I$(ZSTDLIB_PATH) -I$(ZSTDLIB_PATH)/common
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I../ -I$(ZSTDLIB_PATH) -I$(ZSTDLIB_PATH)/common
 
 CFLAGS ?= -O3
-CFLAGS += -g
+CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wconversion \
+          -Wformat=2 -Wstrict-aliasing=1
 
 SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB)
 
 .PHONY: default clean test
-
-default: seekable_tests
+default: test
 
 test: seekable_tests
 	./seekable_tests
@@ -30,9 +30,9 @@
 $(ZSTDLIB):
 	$(MAKE) -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)
 
-seekable_tests : seekable_tests.c $(SEEKABLE_OBJS)
+seekable_tests : $(SEEKABLE_OBJS)
 
 clean:
-	@rm -f core *.o tmp* result* *.zst \
+	@$(RM) core *.o tmp* result* *.zst \
 		seekable_tests
 	@echo Cleaning completed
diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c
index f2556b5..a482638 100644
--- a/contrib/seekable_format/tests/seekable_tests.c
+++ b/contrib/seekable_format/tests/seekable_tests.c
@@ -1,6 +1,8 @@
 #include <stddef.h>
 #include <stdint.h>
+#include <stdlib.h>  // malloc
 #include <stdio.h>
+#include <assert.h>
 
 #include "zstd_seekable.h"
 
@@ -8,7 +10,83 @@
 int main(int argc, const char** argv)
 {
     unsigned testNb = 1;
+    (void)argc; (void)argv;
     printf("Beginning zstd seekable format tests...\n");
+
+    printf("Test %u - simple round trip: ", testNb++);
+    {   size_t const inSize = 4000;
+        void* const inBuffer = malloc(inSize);
+        assert(inBuffer != NULL);
+
+        size_t const seekCapacity = 5000;
+        void* const seekBuffer = malloc(seekCapacity);
+        assert(seekBuffer != NULL);
+        size_t seekSize;
+
+        size_t const outCapacity = inSize;
+        void* const outBuffer = malloc(outCapacity);
+        assert(outBuffer != NULL);
+
+        ZSTD_seekable_CStream* const zscs = ZSTD_seekable_createCStream();
+        assert(zscs != NULL);
+
+        { size_t const initStatus = ZSTD_seekable_initCStream(zscs, 9, 0 /* checksumFlag */, (unsigned)inSize /* maxFrameSize */);
+          assert(!ZSTD_isError(initStatus));
+        }
+
+        {   ZSTD_outBuffer outb = { .dst=seekBuffer, .pos=0, .size=seekCapacity };
+            ZSTD_inBuffer inb = { .src=inBuffer, .pos=0, .size=inSize };
+
+            size_t const cStatus = ZSTD_seekable_compressStream(zscs, &outb, &inb);
+            assert(!ZSTD_isError(cStatus));
+            assert(inb.pos == inb.size);
+
+            size_t const endStatus = ZSTD_seekable_endStream(zscs, &outb);
+            assert(!ZSTD_isError(endStatus));
+            seekSize = outb.pos;
+        }
+
+        ZSTD_seekable* const stream = ZSTD_seekable_create();
+        assert(stream != NULL);
+        { size_t const initStatus = ZSTD_seekable_initBuff(stream, seekBuffer, seekSize);
+          assert(!ZSTD_isError(initStatus)); }
+
+        { size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, outCapacity, 0);
+          assert(decStatus == inSize); }
+
+        /* unit test ZSTD_seekTable functions */
+        ZSTD_seekTable* const zst = ZSTD_seekTable_create_fromSeekable(stream);
+        assert(zst != NULL);
+
+        unsigned const nbFrames = ZSTD_seekTable_getNumFrames(zst);
+        assert(nbFrames > 0);
+
+        unsigned long long const frame0Offset = ZSTD_seekTable_getFrameCompressedOffset(zst, 0);
+        assert(frame0Offset == 0);
+
+        unsigned long long const content0Offset = ZSTD_seekTable_getFrameDecompressedOffset(zst, 0);
+        assert(content0Offset == 0);
+
+        size_t const cSize = ZSTD_seekTable_getFrameCompressedSize(zst, 0);
+        assert(!ZSTD_isError(cSize));
+        assert(cSize <= seekCapacity);
+
+        size_t const origSize = ZSTD_seekTable_getFrameDecompressedSize(zst, 0);
+        assert(origSize == inSize);
+
+        unsigned const fo1idx = ZSTD_seekTable_offsetToFrameIndex(zst, 1);
+        assert(fo1idx == 0);
+
+        free(inBuffer);
+        free(seekBuffer);
+        free(outBuffer);
+        ZSTD_seekable_freeCStream(zscs);
+        ZSTD_seekTable_free(zst);
+        ZSTD_seekable_free(stream);
+    }
+    printf("Success!\n");
+
+
     printf("Test %u - check that seekable decompress does not hang: ", testNb++);
     {   /* Github issue #2335 */
         const size_t compressed_size = 17;
@@ -25,7 +103,7 @@
             '\x00',
             '\x00',
             '\x00',
-            ';',
+            (uint8_t)('\x03'),
             (uint8_t)('\xb1'),
             (uint8_t)('\xea'),
             (uint8_t)('\x92'),
@@ -34,6 +112,61 @@
         const size_t uncompressed_size = 32;
         uint8_t uncompressed_data[32];
 
+        ZSTD_seekable* const stream = ZSTD_seekable_create();
+        assert(stream != NULL);
+        {   size_t const status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
+            if (ZSTD_isError(status)) {
+                ZSTD_seekable_free(stream);
+                goto _test_error;
+        }   }
+
+        /* Should return an error, but not hang */
+        {   const size_t offset = 2;
+            size_t const status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset);
+            if (!ZSTD_isError(status)) {
+                ZSTD_seekable_free(stream);
+                goto _test_error;
+        }   }
+
+        ZSTD_seekable_free(stream);
+    }
+    printf("Success!\n");
+
+    printf("Test %u - check #2 that seekable decompress does not hang: ", testNb++);
+    {   /* Github issue #FIXME */
+        const size_t compressed_size = 27;
+        const uint8_t compressed_data[27] = {
+            (uint8_t)'\x28',
+            (uint8_t)'\xb5',
+            (uint8_t)'\x2f',
+            (uint8_t)'\xfd',
+            (uint8_t)'\x00',
+            (uint8_t)'\x32',
+            (uint8_t)'\x91',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x5e',
+            (uint8_t)'\x2a',
+            (uint8_t)'\x4d',
+            (uint8_t)'\x18',
+            (uint8_t)'\x09',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\xb1',
+            (uint8_t)'\xea',
+            (uint8_t)'\x92',
+            (uint8_t)'\x8f',
+        };
+        const size_t uncompressed_size = 400;
+        uint8_t uncompressed_data[400];
+
         ZSTD_seekable* stream = ZSTD_seekable_create();
         size_t status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
         if (ZSTD_isError(status)) {
diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h
index 7ffd1ba..d2807cf 100644
--- a/contrib/seekable_format/zstd_seekable.h
+++ b/contrib/seekable_format/zstd_seekable.h
@@ -29,6 +29,7 @@
 
 typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
 typedef struct ZSTD_seekable_s ZSTD_seekable;
+typedef struct ZSTD_seekTable_s ZSTD_seekTable;
 
 /*-****************************************************************************
 *  Seekable compression - HowTo
@@ -107,6 +108,7 @@
 ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
 ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
 
+
 /*-****************************************************************************
 *  Seekable decompression - HowTo
 *  A ZSTD_seekable object is required to tracking the seekTable.
@@ -161,13 +163,42 @@
 ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
 
 #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
-/*===== Seek Table access functions =====*/
-ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset);
+/*===== Seekable seek table access functions =====*/
+ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs);
+ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset);
+
+
+/*-****************************************************************************
+*  Direct exploitation of the seekTable
+*
+*  Memory constrained use cases that manage multiple archives
+*  benefit from retaining multiple archive seek tables
+*  without retaining a ZSTD_seekable instance for each.
+*
+*  Below API allow the above-mentioned use cases
+*  to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable,
+*  then throw the ZSTD_seekable away to save memory.
+*
+*  Standard ZSTD operations can then be used
+*  to decompress frames based on seek table offsets.
+******************************************************************************/
+
+/*===== Independent seek table management =====*/
+ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs);
+ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);
+
+/*===== Direct seek table access functions =====*/
+ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st);
+ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset);
+
 
 /*===== Seekable advanced I/O API =====*/
 typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
diff --git a/contrib/seekable_format/zstd_seekable_compression_format.md b/contrib/seekable_format/zstd_seekable_compression_format.md
index bf3080f..55aebfd 100644
--- a/contrib/seekable_format/zstd_seekable_compression_format.md
+++ b/contrib/seekable_format/zstd_seekable_compression_format.md
@@ -53,7 +53,7 @@
 The total size of the skippable frame, not including the `Skippable_Magic_Number` or `Frame_Size`.
 This is for compatibility with [Zstandard skippable frames].
 
-[Zstandard skippable frames]: https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#skippable-frames
+[Zstandard skippable frames]: https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#skippable-frames
 
 #### `Seek_Table_Footer`
 The seek table footer format is as follows:
diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c
index 5a75714..242bd2a 100644
--- a/contrib/seekable_format/zstdseek_compress.c
+++ b/contrib/seekable_format/zstdseek_compress.c
@@ -12,13 +12,13 @@
 #include <assert.h>
 
 #define XXH_STATIC_LINKING_ONLY
-#define XXH_NAMESPACE ZSTD_
 #include "xxhash.h"
 
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 #include "zstd_errors.h"
 #include "mem.h"
+
 #include "zstd_seekable.h"
 
 #define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }
@@ -63,19 +63,18 @@
     int writingSeekTable;
 };
 
-size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
+static size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
 {
     /* allocate some initial space */
     size_t const FRAMELOG_STARTING_CAPACITY = 16;
     fl->entries = (framelogEntry_t*)malloc(
             sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
     if (fl->entries == NULL) return ERROR(memory_allocation);
-    fl->capacity = FRAMELOG_STARTING_CAPACITY;
-
+    fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY;
     return 0;
 }
 
-size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
+static size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
 {
     if (fl != NULL) free(fl->entries);
     return 0;
@@ -83,7 +82,7 @@
 
 ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag)
 {
-    ZSTD_frameLog* fl = malloc(sizeof(ZSTD_frameLog));
+    ZSTD_frameLog* const fl = (ZSTD_frameLog*)malloc(sizeof(ZSTD_frameLog));
     if (fl == NULL) return NULL;
 
     if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) {
@@ -106,10 +105,9 @@
     return 0;
 }
 
-ZSTD_seekable_CStream* ZSTD_seekable_createCStream()
+ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void)
 {
-    ZSTD_seekable_CStream* zcs = malloc(sizeof(ZSTD_seekable_CStream));
-
+    ZSTD_seekable_CStream* const zcs = (ZSTD_seekable_CStream*)malloc(sizeof(ZSTD_seekable_CStream));
     if (zcs == NULL) return NULL;
 
     memset(zcs, 0, sizeof(*zcs));
@@ -134,7 +132,6 @@
     ZSTD_freeCStream(zcs->cstream);
     ZSTD_seekable_frameLog_freeVec(&zcs->framelog);
     free(zcs);
-
     return 0;
 }
 
@@ -152,9 +149,8 @@
         return ERROR(frameParameter_unsupported);
     }
 
-    zcs->maxFrameSize = maxFrameSize
-                                ? maxFrameSize
-                                : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
+    zcs->maxFrameSize = maxFrameSize ?
+                        maxFrameSize : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
 
     zcs->framelog.checksumFlag = checksumFlag;
     if (zcs->framelog.checksumFlag) {
@@ -180,7 +176,7 @@
     if (fl->size == fl->capacity) {
         /* exponential size increase for constant amortized runtime */
         size_t const newCapacity = fl->capacity * 2;
-        framelogEntry_t* const newEntries = realloc(fl->entries,
+        framelogEntry_t* const newEntries = (framelogEntry_t*)realloc(fl->entries,
                 sizeof(framelogEntry_t) * newCapacity);
 
         if (newEntries == NULL) return ERROR(memory_allocation);
@@ -204,7 +200,7 @@
     /* end the frame */
     size_t ret = ZSTD_endStream(zcs->cstream, output);
 
-    zcs->frameCSize += output->pos - prevOutPos;
+    zcs->frameCSize += (U32)(output->pos - prevOutPos);
 
     /* need to flush before doing the rest */
     if (ret) return ret;
@@ -223,9 +219,8 @@
     zcs->frameCSize = 0;
     zcs->frameDSize = 0;
 
-    ZSTD_resetCStream(zcs->cstream, 0);
-    if (zcs->framelog.checksumFlag)
-        XXH64_reset(&zcs->xxhState, 0);
+    ZSTD_CCtx_reset(zcs->cstream, ZSTD_reset_session_only);
+    if (zcs->framelog.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
 
     return 0;
 }
@@ -248,8 +243,8 @@
             XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
         }
 
-        zcs->frameCSize += output->pos - prevOutPos;
-        zcs->frameDSize += inTmp.pos;
+        zcs->frameCSize += (U32)(output->pos - prevOutPos);
+        zcs->frameDSize += (U32)inTmp.pos;
 
         input->pos += inTmp.pos;
 
@@ -290,7 +285,7 @@
         memcpy((BYTE*)output->dst + output->pos,
                tmp + (fl->seekTablePos - offset), lenWrite);
         output->pos += lenWrite;
-        fl->seekTablePos += lenWrite;
+        fl->seekTablePos += (U32)lenWrite;
 
         if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos;
     }
@@ -339,8 +334,7 @@
 
     if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
     if (fl->seekTablePos < seekTableLen - 4) {
-        BYTE sfd = 0;
-        sfd |= (fl->checksumFlag) << 7;
+        BYTE const sfd = (BYTE)((fl->checksumFlag) << 7);
 
         ((BYTE*)output->dst)[output->pos] = sfd;
         output->pos++;
diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c
index cc5c859..5eed024 100644
--- a/contrib/seekable_format/zstdseek_decompress.c
+++ b/contrib/seekable_format/zstdseek_decompress.c
@@ -60,7 +60,6 @@
 #include <assert.h>
 
 #define XXH_STATIC_LINKING_ONLY
-#define XXH_NAMESPACE ZSTD_
 #include "xxhash.h"
 
 #define ZSTD_STATIC_LINKING_ONLY
@@ -107,7 +106,8 @@
 
 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
 {
-    buffWrapper_t* buff = (buffWrapper_t*) opaque;
+    buffWrapper_t* const buff = (buffWrapper_t*)opaque;
+    assert(buff != NULL);
     if (buff->pos + n > buff->size) return -1;
     memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
     buff->pos += n;
@@ -118,15 +118,17 @@
 {
     buffWrapper_t* const buff = (buffWrapper_t*) opaque;
     unsigned long long newOffset;
+    assert(buff != NULL);
     switch (origin) {
     case SEEK_SET:
-        newOffset = offset;
+        assert(offset >= 0);
+        newOffset = (unsigned long long)offset;
         break;
     case SEEK_CUR:
-        newOffset = (unsigned long long)buff->pos + offset;
+        newOffset = (unsigned long long)((long long)buff->pos + offset);
         break;
     case SEEK_END:
-        newOffset = (unsigned long long)buff->size + offset;
+        newOffset = (unsigned long long)((long long)buff->size + offset);
         break;
     default:
         assert(0);  /* not possible */
@@ -144,18 +146,18 @@
     U32 checksum;
 } seekEntry_t;
 
-typedef struct {
+struct ZSTD_seekTable_s {
     seekEntry_t* entries;
     size_t tableLen;
 
     int checksumFlag;
-} seekTable_t;
+};
 
 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
 
 struct ZSTD_seekable_s {
     ZSTD_DStream* dstream;
-    seekTable_t seekTable;
+    ZSTD_seekTable seekTable;
     ZSTD_seekable_customFile src;
 
     U64 decompressedOffset;
@@ -173,8 +175,7 @@
 
 ZSTD_seekable* ZSTD_seekable_create(void)
 {
-    ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
-
+    ZSTD_seekable* const zs = (ZSTD_seekable*)malloc(sizeof(ZSTD_seekable));
     if (zs == NULL) return NULL;
 
     /* also initializes stage to zsds_init */
@@ -195,7 +196,35 @@
     ZSTD_freeDStream(zs->dstream);
     free(zs->seekTable.entries);
     free(zs);
+    return 0;
+}
 
+ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs)
+{
+    ZSTD_seekTable* const st = (ZSTD_seekTable*)malloc(sizeof(ZSTD_seekTable));
+    if (st==NULL) return NULL;
+
+    st->checksumFlag = zs->seekTable.checksumFlag;
+    st->tableLen = zs->seekTable.tableLen;
+
+    /* Allocate an extra entry at the end to match logic of initial allocation */
+    size_t const entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1);
+    seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize);
+    if (entries==NULL) {
+        free(st);
+        return NULL;
+    }
+
+    memcpy(entries, zs->seekTable.entries, entriesSize);
+    st->entries = entries;
+    return st;
+}
+
+size_t ZSTD_seekTable_free(ZSTD_seekTable* st)
+{
+    if (st == NULL) return 0; /* support free on null */
+    free(st->entries);
+    free(st);
     return 0;
 }
 
@@ -203,19 +232,24 @@
  *  Performs a binary search to find the last frame with a decompressed offset
  *  <= pos
  *  @return : the frame's index */
-unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
+unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long pos)
+{
+    return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos);
+}
+
+unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long pos)
 {
     U32 lo = 0;
-    U32 hi = (U32)zs->seekTable.tableLen;
-    assert(zs->seekTable.tableLen <= UINT_MAX);
+    U32 hi = (U32)st->tableLen;
+    assert(st->tableLen <= UINT_MAX);
 
-    if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
-        return (U32)zs->seekTable.tableLen;
+    if (pos >= st->entries[st->tableLen].dOffset) {
+        return (unsigned)st->tableLen;
     }
 
     while (lo + 1 < hi) {
         U32 const mid = lo + ((hi - lo) >> 1);
-        if (zs->seekTable.entries[mid].dOffset <= pos) {
+        if (st->entries[mid].dOffset <= pos) {
             lo = mid;
         } else {
             hi = mid;
@@ -224,36 +258,61 @@
     return lo;
 }
 
-unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
+unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs)
 {
-    assert(zs->seekTable.tableLen <= UINT_MAX);
-    return (unsigned)zs->seekTable.tableLen;
+    return ZSTD_seekTable_getNumFrames(&zs->seekTable);
 }
 
-unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st)
 {
-    if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
-    return zs->seekTable.entries[frameIndex].cOffset;
+    assert(st->tableLen <= UINT_MAX);
+    return (unsigned)st->tableLen;
 }
 
-unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex)
 {
-    if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
-    return zs->seekTable.entries[frameIndex].dOffset;
+    return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex);
 }
 
-size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex)
 {
-    if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
-    return zs->seekTable.entries[frameIndex + 1].cOffset -
-           zs->seekTable.entries[frameIndex].cOffset;
+    if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
+    return st->entries[frameIndex].cOffset;
 }
 
-size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex)
 {
-    if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
-    return zs->seekTable.entries[frameIndex + 1].dOffset -
-           zs->seekTable.entries[frameIndex].dOffset;
+    return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex);
+}
+
+unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex)
+{
+    if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
+    return st->entries[frameIndex].dOffset;
+}
+
+size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex)
+{
+    return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex);
+}
+
+size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex)
+{
+    if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge);
+    return st->entries[frameIndex + 1].cOffset -
+           st->entries[frameIndex].cOffset;
+}
+
+size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex)
+{
+    return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex);
+}
+
+size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex)
+{
+    if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge);
+    return st->entries[frameIndex + 1].dOffset -
+           st->entries[frameIndex].dOffset;
 }
 
 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
@@ -272,10 +331,9 @@
         checksumFlag = sfd >> 7;
 
         /* check reserved bits */
-        if ((checksumFlag >> 2) & 0x1f) {
+        if ((sfd >> 2) & 0x1f) {
             return ERROR(corruption_detected);
-        }
-    }
+    }   }
 
     {   U32 const numFrames = MEM_readLE32(zs->inBuff);
         U32 const sizePerEntry = 8 + (checksumFlag?4:0);
@@ -283,12 +341,9 @@
         U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
 
         U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
-        {
-            U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
-
+        {   U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
             CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
             CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
-
             remaining -= toRead;
         }
 
@@ -301,19 +356,15 @@
 
         {   /* Allocate an extra entry at the end so that we can do size
              * computations on the last element without special case */
-            seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
+            seekEntry_t* const entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
 
             U32 idx = 0;
             U32 pos = 8;
 
-
             U64 cOffset = 0;
             U64 dOffset = 0;
 
-            if (!entries) {
-                free(entries);
-                return ERROR(memory_allocation);
-            }
+            if (entries == NULL) return ERROR(memory_allocation);
 
             /* compute cumulative positions */
             for (; idx < numFrames; idx++) {
@@ -381,26 +432,37 @@
 
 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
 {
+    unsigned long long const eos = zs->seekTable.entries[zs->seekTable.tableLen].dOffset;
+    if (offset + len > eos) {
+        len = eos - offset;
+    }
+
     U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
     U32 noOutputProgressCount = 0;
+    size_t srcBytesRead = 0;
     do {
         /* check if we can continue from a previous decompress job */
         if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
             zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
             zs->curFrame = targetFrame;
 
+            assert(zs->seekTable.entries[targetFrame].cOffset < LLONG_MAX);
             CHECK_IO(zs->src.seek(zs->src.opaque,
-                                  zs->seekTable.entries[targetFrame].cOffset,
+                                  (long long)zs->seekTable.entries[targetFrame].cOffset,
                                   SEEK_SET));
             zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
             XXH64_reset(&zs->xxhState, 0);
-            ZSTD_resetDStream(zs->dstream);
+            ZSTD_DCtx_reset(zs->dstream, ZSTD_reset_session_only);
+            if (zs->buffWrapper.size && srcBytesRead > zs->buffWrapper.size) {
+                return ERROR(seekableIO);
+            }
         }
 
         while (zs->decompressedOffset < offset + len) {
             size_t toRead;
             ZSTD_outBuffer outTmp;
             size_t prevOutPos;
+            size_t prevInPos;
             size_t forwardProgress;
             if (zs->decompressedOffset < offset) {
                 /* dummy decompressions until we get to the target offset */
@@ -410,6 +472,7 @@
             }
 
             prevOutPos = outTmp.pos;
+            prevInPos = zs->in.pos;
             toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
             if (ZSTD_isError(toRead)) {
                 return toRead;
@@ -428,6 +491,7 @@
                 noOutputProgressCount = 0;
             }
             zs->decompressedOffset += forwardProgress;
+            srcBytesRead += zs->in.pos - prevInPos;
 
             if (toRead == 0) {
                 /* frame complete */
@@ -442,6 +506,8 @@
                 if (zs->decompressedOffset < offset + len) {
                     /* go back to the start and force a reset of the stream */
                     targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
+                    /* in this case it will fail later with corruption_detected, since last block does not have checksum */
+                    assert(targetFrame != zs->seekTable.tableLen);
                 }
                 break;
             }
@@ -453,7 +519,7 @@
                 zs->in.size = toRead;
                 zs->in.pos = 0;
             }
-        }
+        }  /* while (zs->decompressedOffset < offset + len) */
     } while (zs->decompressedOffset != offset + len);
 
     return len;
@@ -465,8 +531,7 @@
         return ERROR(frameIndex_tooLarge);
     }
 
-    {
-        size_t const decompressedSize =
+    {   size_t const decompressedSize =
                 zs->seekTable.entries[frameIndex + 1].dOffset -
                 zs->seekTable.entries[frameIndex].dOffset;
         if (dstSize < decompressedSize) {
diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile
index 316c6ea..a9c601e 100644
--- a/doc/educational_decoder/Makefile
+++ b/doc/educational_decoder/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c
index 1403a6e..935f60d 100644
--- a/doc/educational_decoder/harness.c
+++ b/doc/educational_decoder/harness.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index 605918b..62e6f0d 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h
index 2b44eee..d89c835 100644
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 0af6bf9..5c7deb9 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -3,7 +3,7 @@
 
 ### Notices
 
-Copyright (c) 2016-2020 Yann Collet, Facebook, Inc.
+Copyright (c) 2016-2021 Yann Collet, Facebook, Inc.
 
 Permission is granted to copy and distribute this document
 for any purpose and without charge,
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index cb5ded0..010f10a 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.7 Manual</title>
+<title>zstd 1.5.0 Manual</title>
 </head>
 <body>
-<h1>zstd 1.4.7 Manual</h1>
+<h1>zstd 1.5.0 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -12,15 +12,15 @@
 <li><a href="#Chapter2">Version</a></li>
 <li><a href="#Chapter3">Simple API</a></li>
 <li><a href="#Chapter4">Explicit context</a></li>
-<li><a href="#Chapter5">Advanced compression API</a></li>
-<li><a href="#Chapter6">Advanced decompression API</a></li>
+<li><a href="#Chapter5">Advanced compression API (Requires v1.4.0+)</a></li>
+<li><a href="#Chapter6">Advanced decompression API (Requires v1.4.0+)</a></li>
 <li><a href="#Chapter7">Streaming</a></li>
 <li><a href="#Chapter8">Streaming compression - HowTo</a></li>
 <li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
 <li><a href="#Chapter10">Simple dictionary API</a></li>
 <li><a href="#Chapter11">Bulk processing dictionary API</a></li>
 <li><a href="#Chapter12">Dictionary helper functions</a></li>
-<li><a href="#Chapter13">Advanced dictionary and prefix API</a></li>
+<li><a href="#Chapter13">Advanced dictionary and prefix API (Requires v1.4.0+)</a></li>
 <li><a href="#Chapter14">experimental API (static linking only)</a></li>
 <li><a href="#Chapter15">Frame size functions</a></li>
 <li><a href="#Chapter16">Memory management</a></li>
@@ -141,8 +141,9 @@
 size_t      ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case single-pass scenario */<b>
 unsigned    ZSTD_isError(size_t code);          </b>/*!< tells if a `size_t` function result is an error code */<b>
 const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable string from an error code */<b>
-int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed */<b>
+int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed, requires v1.4.0+ */<b>
 int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
+int         ZSTD_defaultCLevel(void);           </b>/*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */<b>
 </pre></b><BR>
 <a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>
 
@@ -157,7 +158,7 @@
  
 </pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
-size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
                          void* dst, size_t dstCapacity,
@@ -179,7 +180,7 @@
   Use one context per thread for parallel execution. 
 </pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
-size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
                            void* dst, size_t dstCapacity,
@@ -190,7 +191,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter5"></a><h2>Advanced compression API</h2><pre></pre>
+<a name="Chapter5"></a><h2>Advanced compression API (Requires v1.4.0+)</h2><pre></pre>
 
 <pre><b>typedef enum { ZSTD_fast=1,
                ZSTD_dfast=2,
@@ -270,7 +271,6 @@
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
-
     </b>/* LDM mode parameters */<b>
     ZSTD_c_enableLongDistanceMatching=160, </b>/* Enable long distance matching.<b>
                                      * This parameter is designed to improve compression ratio
@@ -327,7 +327,7 @@
     ZSTD_c_jobSize=401,      </b>/* Size of a compression job. This value is enforced only when nbWorkers >= 1.<b>
                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                               * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
                               * The minimum size is automatically and transparently enforced. */
     ZSTD_c_overlapLog=402,   </b>/* Control the overlap size, as a fraction of window size.<b>
                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
@@ -357,6 +357,8 @@
      * ZSTD_c_stableOutBuffer
      * ZSTD_c_blockDelimiters
      * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -372,7 +374,10 @@
      ZSTD_c_experimentalParam9=1006,
      ZSTD_c_experimentalParam10=1007,
      ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
 } ZSTD_cParameter;
 </b></pre><BR>
 <pre><b>typedef struct {
@@ -456,7 +461,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter6"></a><h2>Advanced decompression API</h2><pre></pre>
+<a name="Chapter6"></a><h2>Advanced decompression API (Requires v1.4.0+)</h2><pre></pre>
 
 <pre><b>typedef enum {
 
@@ -473,12 +478,14 @@
      * ZSTD_d_format
      * ZSTD_d_stableOutBuffer
      * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly
      */
      ZSTD_d_experimentalParam1=1000,
      ZSTD_d_experimentalParam2=1001,
-     ZSTD_d_experimentalParam3=1002
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
 
 } ZSTD_dParameter;
 </b></pre><BR>
@@ -585,7 +592,7 @@
 <pre><b>typedef ZSTD_CCtx ZSTD_CStream;  </b>/**< CCtx and CStream are now effectively same object (>= v1.3.0) */<b>
 </b></pre><BR>
 <h3>ZSTD_CStream management functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream(void);
-size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <h3>Streaming compression functions</h3><pre></pre><b><pre>typedef enum {
     ZSTD_e_continue=0, </b>/* collect more data, encoder decides when to output compressed result, for optimal compression ratio */<b>
@@ -679,7 +686,7 @@
 <pre><b>typedef ZSTD_DCtx ZSTD_DStream;  </b>/**< DCtx and DStream are now effectively same object (>= v1.3.0) */<b>
 </b></pre><BR>
 <h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
-size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+size_t ZSTD_freeDStream(ZSTD_DStream* zds);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 <pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
@@ -695,7 +702,7 @@
                                int compressionLevel);
 </b><p>  Compression at an explicit compression level using a Dictionary.
   A dictionary can be any arbitrary data segment (also called a prefix),
-  or a buffer with specified information (see dictBuilder/zdict.h).
+  or a buffer with specified information (see zdict.h).
   Note : This function loads the dictionary, resulting in significant startup delay.
          It's intended for a dictionary used only once.
   Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. 
@@ -730,7 +737,8 @@
 </p></pre><BR>
 
 <pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
-</b><p>  Function frees memory allocated by ZSTD_createCDict(). 
+</b><p>  Function frees memory allocated by ZSTD_createCDict().
+  If a NULL pointer is passed, no operation is performed. 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
@@ -749,7 +757,8 @@
 </p></pre><BR>
 
 <pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
-</b><p>  Function frees memory allocated with ZSTD_createDDict() 
+</b><p>  Function frees memory allocated with ZSTD_createDDict()
+  If a NULL pointer is passed, no operation is performed. 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
@@ -768,6 +777,12 @@
   It can still be loaded, but as a content-only dictionary. 
 </p></pre><BR>
 
+<pre><b>unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+</b><p>  Provides the dictID of the dictionary loaded into `cdict`.
+  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
+</p></pre><BR>
+
 <pre><b>unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 </b><p>  Provides the dictID of the dictionary loaded into `ddict`.
   If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
@@ -786,7 +801,7 @@
   When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
 </p></pre><BR>
 
-<a name="Chapter13"></a><h2>Advanced dictionary and prefix API</h2><pre>
+<a name="Chapter13"></a><h2>Advanced dictionary and prefix API (Requires v1.4.0+)</h2><pre>
  This API allows dictionaries to be used with ZSTD_compress2(),
  ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
  only reset with the context is reset with ZSTD_reset_parameters or
@@ -816,7 +831,7 @@
 </b><p>  Reference a prepared dictionary, to be used for all next compressed frames.
   Note that compression parameters are enforced from within CDict,
   and supersede any compression parameter previously set within CCtx.
-  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
   The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
   The dictionary will remain valid for future compressed frames using same CCtx.
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -867,6 +882,13 @@
 <pre><b>size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 </b><p>  Reference a prepared dictionary, to be used to decompress next frames.
   The dictionary remains active for decompression of future frames using same DCtx.
+
+  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+  will store the DDict references in a table, and the DDict used for decompression
+  will be determined at decompression time, as per the dict ID in the frame.
+  The memory for the table is allocated on the first call to refDDict, and can be
+  freed with ZSTD_freeDCtx().
+
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
   Note 1 : Currently, only one dictionary can be managed.
            Referencing a new dictionary effectively "discards" any previous one.
@@ -996,6 +1018,12 @@
 } ZSTD_forceIgnoreChecksum_e;
 </b></pre><BR>
 <pre><b>typedef enum {
+    </b>/* Note: this enum controls ZSTD_d_refMultipleDDicts */<b>
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+</b></pre><BR>
+<pre><b>typedef enum {
     </b>/* Note: this enum and the behavior it controls are effectively internal<b>
      * implementation details of the compressor. They are expected to continue
      * to evolve and should be considered only in the context of extremely
@@ -1043,6 +1071,12 @@
   ZSTD_lcm_uncompressed = 2   </b>/**< Always emit uncompressed literals. */<b>
 } ZSTD_literalCompressionMode_e;
 </b></pre><BR>
+<pre><b>typedef enum {
+  ZSTD_urm_auto = 0,                   </b>/* Automatically determine whether or not we use row matchfinder */<b>
+  ZSTD_urm_disableRowMatchFinder = 1,  </b>/* Never use row matchfinder */<b>
+  ZSTD_urm_enableRowMatchFinder = 2    </b>/* Always use row matchfinder when applicable */<b>
+} ZSTD_useRowMatchFinderMode_e;
+</b></pre><BR>
 <a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
 
 <pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
@@ -1073,7 +1107,7 @@
   `srcSize` must be the _exact_ size of this series
        (i.e. there should be a frame boundary at `src + srcSize`)
   @return : - upper-bound for the decompressed size of all data in all successive frames
-            - if an error occured: ZSTD_CONTENTSIZE_ERROR
+            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
 
   note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
   note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
@@ -1155,6 +1189,22 @@
  
 </p></pre><BR>
 
+<pre><b>size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize, unsigned magicVariant);
+</b><p> Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+
+ Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+
+ Returns an error if destination buffer is not large enough, if the source size is not representable
+ with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+
+ @return : number of bytes written or a ZSTD error.
+ 
+</p></pre><BR>
+
 <a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
 
 <pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
@@ -1263,12 +1313,6 @@
   note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef 
 </p></pre><BR>
 
-<pre><b>unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
-</b><p>  Provides the dictID of the dictionary loaded into `cdict`.
-  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
-  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
-</p></pre><BR>
-
 <pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 </b><p> @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  `estimatedSrcSize` value is optional, select 0 if not known 
@@ -1292,24 +1336,26 @@
   This function never fails (wide contract) 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize,
                         const void* dict,size_t dictSize,
                               ZSTD_parameters params);
 </b><p>  Note : this function is now DEPRECATED.
          It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
-  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x 
+  This prototype will generate compilation warnings. 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                   void* dst, size_t dstCapacity,
                             const void* src, size_t srcSize,
                             const ZSTD_CDict* cdict,
                                   ZSTD_frameParameters fParams);
-</b><p>  Note : this function is now REDUNDANT.
+</b><p>  Note : this function is now DEPRECATED.
          It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
-  This prototype will be marked as deprecated and generate compilation warning in some future version 
+  This prototype will generate compilation warnings. 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
@@ -1328,7 +1374,7 @@
   how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+<pre><b>size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 </b><p>  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
   and store it into int* value.
  @return : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -1336,7 +1382,7 @@
 </p></pre><BR>
 
 <pre><b>ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  </b>/* accept NULL pointer */<b>
 </b><p>  Quick howto :
   - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
   - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
@@ -1348,7 +1394,7 @@
                                     These parameters will be applied to
                                     all subsequent frames.
   - ZSTD_compressStream2() : Do compression using the CCtx.
-  - ZSTD_freeCCtxParams() : Free the memory.
+  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
 
   This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
   for static allocation of CCtx for single-threaded compression.
@@ -1382,7 +1428,7 @@
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+<pre><b>size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 </b><p> Similar to ZSTD_CCtx_getParameter.
  Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -1462,8 +1508,10 @@
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
-</b><p>  Instruct the decoder context about what kind of data to decode next.
+<pre><b>ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+</b><p>  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+  Instruct the decoder context about what kind of data to decode next.
   This instruction is mandatory to decode data without a fully-formed header,
   such ZSTD_f_zstd1_magicless for example.
  @return : 0, or an error code (which can be tested using ZSTD_isError()). 
@@ -1486,11 +1534,11 @@
 <BR></pre>
 
 <h3>Advanced Streaming compression functions</h3><pre></pre><b><pre></pre></b><BR>
-<pre><b>size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
              int compressionLevel,
              unsigned long long pledgedSrcSize);
-</b><p> This function is deprecated, and equivalent to:
+</b><p> This function is DEPRECATED, and equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@@ -1499,15 +1547,15 @@
  pledgedSrcSize must be correct. If it is not known at init time, use
  ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
  "0" also disables frame content size field. It may be enabled in the future.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
          const void* dict, size_t dictSize,
                int compressionLevel);
-</b><p> This function is deprecated, and is equivalent to:
+</b><p> This function is DEPRECATED, and is equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
      ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
@@ -1516,16 +1564,16 @@
  dict == NULL or dictSize < 8, in which case no dict is used.
  Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
  it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
         const void* dict, size_t dictSize,
               ZSTD_parameters params,
               unsigned long long pledgedSrcSize);
-</b><p> This function is deprecated, and is approximately equivalent to:
+</b><p> This function is DEPRECATED, and is approximately equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      // Pseudocode: Set each zstd parameter and leave the rest as-is.
      for ((param, value) : params) {
@@ -1537,22 +1585,23 @@
  dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
  pledgedSrcSize must be correct.
  If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
-</b><p> This function is deprecated, and equivalent to:
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+</b><p> This function is DEPRECATED, and equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, cdict);
-
+ 
  note : cdict will just be referenced, and must outlive compression session
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                    const ZSTD_CDict* cdict,
                          ZSTD_frameParameters fParams,
                          unsigned long long pledgedSrcSize);
@@ -1568,14 +1617,18 @@
  same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
  pledgedSrcSize must be correct. If srcSize is not known at init time, use
  value ZSTD_CONTENTSIZE_UNKNOWN.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
-</b><p> This function is deprecated, and is equivalent to:
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+</b><p> This function is DEPRECATED, and is equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+       explicitly specified.
 
   start a new frame, using same parameters from previous frame.
   This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
@@ -1585,7 +1638,7 @@
   For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
   but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  @return : 0, or an error code (which can be tested using ZSTD_isError())
-  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+  This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
@@ -1656,8 +1709,7 @@
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
 
   Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
   It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
 
   Then, consume your input using ZSTD_compressContinue().
@@ -1681,11 +1733,11 @@
 
 <h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
-size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </pre></b><BR>
+<pre><b>size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
+</b></pre><BR>
 <a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
diff --git a/examples/Makefile b/examples/Makefile
index f5e3274..8d7361d 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/common.h b/examples/common.h
index 4492c7e..181f6b3 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c
index d9aad45..0eee650 100644
--- a/examples/dictionary_compression.c
+++ b/examples/dictionary_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c
index 7e50986..107cfc1 100644
--- a/examples/dictionary_decompression.c
+++ b/examples/dictionary_decompression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c
index e409467..5d2a28f 100644
--- a/examples/multiple_simple_compression.c
+++ b/examples/multiple_simple_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c
index 8a4dc96..d4efc8e 100644
--- a/examples/multiple_streaming_compression.c
+++ b/examples/multiple_streaming_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/simple_compression.c b/examples/simple_compression.c
index 618080b..27a65b1 100644
--- a/examples/simple_compression.c
+++ b/examples/simple_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c
index e108987..59c1fd4 100644
--- a/examples/simple_decompression.c
+++ b/examples/simple_decompression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c
index 045437f..e20bcde 100644
--- a/examples/streaming_compression.c
+++ b/examples/streaming_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_compression_thread_pool.c b/examples/streaming_compression_thread_pool.c
index 22c3b2e..5a6551b 100644
--- a/examples/streaming_compression_thread_pool.c
+++ b/examples/streaming_compression_thread_pool.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Martin Liska, SUSE, Facebook, Inc.
+ * Copyright (c) Martin Liska, SUSE, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c
index 26eda34..6dc4c22 100644
--- a/examples/streaming_decompression.c
+++ b/examples/streaming_decompression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c
index 37dd660..a5219ef 100644
--- a/examples/streaming_memory_usage.c
+++ b/examples/streaming_memory_usage.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/BUCK b/lib/BUCK
index 637c20d..60c6bbb 100644
--- a/lib/BUCK
+++ b/lib/BUCK
@@ -65,9 +65,7 @@
     name='zdict',
     header_namespace='',
     visibility=['PUBLIC'],
-    exported_headers=subdir_glob([
-        ('dictBuilder', 'zdict.h'),
-    ]),
+    exported_headers=['zdict.h'],
     headers=subdir_glob([
         ('dictBuilder', 'divsufsort.h'),
         ('dictBuilder', 'cover.h'),
@@ -131,10 +129,10 @@
     name='errors',
     header_namespace='',
     visibility=['PUBLIC'],
-    exported_headers=subdir_glob([
-        ('common', 'error_private.h'),
-        ('common', 'zstd_errors.h'),
-    ]),
+    exported_headers=[
+        'zstd_errors.h',
+        'common/error_private.h',
+    ]
     srcs=['common/error_private.c'],
 )
 
diff --git a/lib/Makefile b/lib/Makefile
index 869d766..9109476 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,9 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################
 
+# Note: by default, the static library is built single-threaded and dynamic library is built
+# multi-threaded. It is possible to force multi or single threaded builds by appending
+# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded).
 .PHONY: default
 default: lib-release
 
@@ -68,6 +71,10 @@
 CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
 FLAGS     = $(CPPFLAGS) $(CFLAGS)
 
+CPPFLAGS_DYNLIB  = -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded
+LDFLAGS_DYNLIB   = -pthread
+CPPFLAGS_STATLIB =                    # static library build defaults to single-threaded
+
 HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 GREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
@@ -91,7 +98,7 @@
 ZSTD_LIB_COMPRESSION ?= 1
 ZSTD_LIB_DECOMPRESSION ?= 1
 ZSTD_LIB_DICTBUILDER ?= 1
-ZSTD_LIB_DEPRECATED ?= 1
+ZSTD_LIB_DEPRECATED ?= 0
 
 # Legacy support
 ZSTD_LEGACY_SUPPORT ?= 5
@@ -176,9 +183,13 @@
 
 ifndef BUILD_DIR
 ifeq ($(UNAME), Darwin)
-  HASH ?= md5
+  ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
+    HASH ?= md5
+  endif
 else ifeq ($(UNAME), FreeBSD)
   HASH ?= gmd5sum
+else ifeq ($(UNAME), NetBSD)
+  HASH ?= md5 -n
 else ifeq ($(UNAME), OpenBSD)
   HASH ?= md5
 endif
@@ -208,21 +219,19 @@
 endif
 
 SET_CACHE_DIRECTORY = \
-	$(MAKE) --no-print-directory $@ \
+   +$(MAKE) --no-print-directory $@ \
     BUILD_DIR=obj/$(HASH_DIR) \
     CPPFLAGS="$(CPPFLAGS)" \
     CFLAGS="$(CFLAGS)" \
     LDFLAGS="$(LDFLAGS)"
 
 
-.PHONY: lib-all all clean install uninstall
-
-# alias
-lib-all: all
-
+.PHONY: all
 all: lib
 
+
 .PHONY: libzstd.a  # must be run every time
+libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB)
 
 ifndef BUILD_DIR
 # determine BUILD_DIR from compilation flags
@@ -239,7 +248,10 @@
 $(ZSTD_STATLIB): ARFLAGS = rcs
 $(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
 $(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
-	@echo compiling static library
+  # Check for multithread flag at target execution time
+	$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
+    @echo compiling multi-threaded static library $(LIBVER),\
+    @echo compiling single-threaded static library $(LIBVER))
 	$(AR) $(ARFLAGS) $@ $^
 
 libzstd.a: $(ZSTD_STATLIB)
@@ -258,8 +270,9 @@
 
 LIBZSTD = libzstd.$(SHARED_EXT_VER)
 .PHONY: $(LIBZSTD)  # must be run every time
-$(LIBZSTD): CFLAGS += -fPIC
-$(LIBZSTD): LDFLAGS += -shared -fvisibility=hidden
+$(LIBZSTD): CPPFLAGS += $(CPPFLAGS_DYNLIB)
+$(LIBZSTD): CFLAGS   += -fPIC -fvisibility=hidden
+$(LIBZSTD): LDFLAGS  += -shared $(LDFLAGS_DYNLIB)
 
 ifndef BUILD_DIR
 # determine BUILD_DIR from compilation flags
@@ -276,7 +289,10 @@
 
 $(ZSTD_DYNLIB): | $(ZSTD_DYNLIB_DIR)
 $(ZSTD_DYNLIB): $(ZSTD_DYNLIB_OBJ)
-	@echo compiling dynamic library $(LIBVER)
+# Check for multithread flag at target execution time
+	$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
+    @echo compiling multi-threaded dynamic library $(LIBVER),\
+    @echo compiling single-threaded dynamic library $(LIBVER))
 	$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
 	@echo creating versioned links
 	ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
@@ -298,10 +314,17 @@
 # note : do not define lib-mt or lib-release as .PHONY
 # make does not consider implicit pattern rule for .PHONY target
 
-%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
-%-mt : LDFLAGS  += -pthread
+%-mt : CPPFLAGS_DYNLIB  := -DZSTD_MULTITHREAD
+%-mt : CPPFLAGS_STATLIB := -DZSTD_MULTITHREAD
+%-mt : LDFLAGS_DYNLIB   := -pthread
 %-mt : %
-	@echo multi-threading build completed
+	@echo multi-threaded build completed
+
+%-nomt : CPPFLAGS_DYNLIB  :=
+%-nomt : LDFLAGS_DYNLIB   :=
+%-nomt : CPPFLAGS_STATLIB :=
+%-nomt : %
+	@echo single-threaded build completed
 
 %-release : DEBUGFLAGS :=
 %-release : %
@@ -333,12 +356,14 @@
 # Special case : building library in single-thread mode _and_ without zstdmt_compress.c
 ZSTDMT_FILES = compress/zstdmt_compress.c
 ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
-libzstd-nomt: LDFLAGS += -shared -fPIC -fvisibility=hidden
+libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden
+libzstd-nomt: LDFLAGS += -shared
 libzstd-nomt: $(ZSTD_NOMT_FILES)
 	@echo compiling single-thread dynamic library $(LIBVER)
 	@echo files : $(ZSTD_NOMT_FILES)
 	$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
 
+.PHONY: clean
 clean:
 	$(RM) -r *.dSYM   # macOS-specific
 	$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
@@ -407,13 +432,16 @@
           -e 's|@VERSION@|$(VERSION)|' \
           $< >$@
 
+.PHONY: install
 install: install-pc install-static install-shared install-includes
 	@echo zstd static and shared library installed
 
+.PHONY: install-pc
 install-pc: libzstd.pc
 	[ -e $(DESTDIR)$(PKGCONFIGDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
 	$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
 
+.PHONY: install-static
 install-static:
 	# only generate libzstd.a if it's not already present
 	[ -e libzstd.a ] || $(MAKE) libzstd.a-release
@@ -421,6 +449,7 @@
 	@echo Installing static library
 	$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
 
+.PHONY: install-shared
 install-shared:
 	# only generate libzstd.so if it's not already present
 	[ -e $(LIBZSTD) ] || $(MAKE) libzstd-release
@@ -430,13 +459,15 @@
 	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
 	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
 
+.PHONY: install-includes
 install-includes:
 	[ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
 	@echo Installing includes
 	$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
-	$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
-	$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zdict.h $(DESTDIR)$(INCLUDEDIR)
 
+.PHONY: uninstall
 uninstall:
 	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
 	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
diff --git a/lib/README.md b/lib/README.md
index db9170a..f781ac5 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -19,12 +19,16 @@
 
 #### Multithreading support
 
-Multithreading is disabled by default when building with `make`.
+When building with `make`, by default the dynamic library is multithreaded and static library is single-threaded (for compatibility reasons).
+
 Enabling multithreading requires 2 conditions :
 - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
 - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
 
-Both conditions are automatically applied when invoking `make lib-mt` target.
+For convenience, we provide a build target to generate multi and single threaded libraries:
+- Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
+- Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
+- By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
 
 When linking a POSIX program with a multithreaded version of `libzstd`,
 note that it's necessary to invoke the `-pthread` flag during link stage.
@@ -42,8 +46,8 @@
 
 Optional advanced features are exposed via :
 
-- `lib/common/zstd_errors.h` : translates `size_t` function results
-                               into a `ZSTD_ErrorCode`, for accurate error handling.
+- `lib/zstd_errors.h` : translates `size_t` function results
+                        into a `ZSTD_ErrorCode`, for accurate error handling.
 
 - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
                           it unlocks access to the experimental API,
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index d9a2730..2e5a933 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * bitstream
  * Part of FSE library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index 3e454f3..a951d0a 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -90,6 +90,7 @@
 #  endif
 #endif
 
+
 /* target attribute */
 #ifndef __has_attribute
   #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
diff --git a/lib/common/cpu.h b/lib/common/cpu.h
index cb21059..8acd33b 100644
--- a/lib/common/cpu.h
+++ b/lib/common/cpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/debug.c b/lib/common/debug.c
index f303f4a..bb863c9 100644
--- a/lib/common/debug.c
+++ b/lib/common/debug.c
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * debug
  * Part of FSE library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/debug.h b/lib/common/debug.h
index 8b57343..3b2a320 100644
--- a/lib/common/debug.h
+++ b/lib/common/debug.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * debug
  * Part of FSE library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c
index f9fcb1a..41cd695 100644
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * Common functions of New Generation Entropy library
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
index 45bba53..6d1135f 100644
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/error_private.h b/lib/common/error_private.h
index 71b37b8..6d8b9f7 100644
--- a/lib/common/error_private.h
+++ b/lib/common/error_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,8 +21,8 @@
 /* ****************************************
 *  Dependencies
 ******************************************/
-#include "zstd_deps.h"    /* size_t */
-#include "zstd_errors.h"  /* enum list */
+#include "../zstd_errors.h"  /* enum list */
+#include "zstd_deps.h"       /* size_t */
 
 
 /* ****************************************
diff --git a/lib/common/fse.h b/lib/common/fse.h
index 83a0784..19dd4fe 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * FSE : Finite State Entropy codec
  * Public Prototypes declaration
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -335,9 +335,10 @@
 
 /* FSE_buildCTable_wksp() :
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog)`.
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
  */
-#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * (maxSymbolValue + 2) + (1ull << tableLog))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
 
 #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
@@ -351,7 +352,7 @@
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index c164430..f4ff58f 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * FSE : Finite State Entropy decoder
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -310,6 +310,12 @@
     return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
 }
 
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
 FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
         void* dst, size_t dstCapacity,
         const void* cSrc, size_t cSrcSize,
@@ -318,33 +324,37 @@
 {
     const BYTE* const istart = (const BYTE*)cSrc;
     const BYTE* ip = istart;
-    short counting[FSE_MAX_SYMBOL_VALUE+1];
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-    FSE_DTable* const dtable = (FSE_DTable*)workSpace;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
 
     /* normal FSE decoding mode */
-    size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
-    if (FSE_isError(NCountLength)) return NCountLength;
-    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
-    assert(NCountLength <= cSrcSize);
-    ip += NCountLength;
-    cSrcSize -= NCountLength;
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
 
     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
-    workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
-    wkspSize -= FSE_DTABLE_SIZE(tableLog);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
 
-    CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
 
     {
-        const void* ptr = dtable;
+        const void* ptr = wksp->dtable;
         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
         const U32 fastMode = DTableH->fastMode;
 
         /* select fast mode (static) */
-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
     }
 }
 
diff --git a/lib/common/huf.h b/lib/common/huf.h
index 1afef90..3d47ced 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * huff0 huffman codec,
  * part of Finite State Entropy library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -192,6 +192,7 @@
 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@@ -278,7 +279,7 @@
  *  a required workspace size greater than that specified in the following
  *  macro.
  */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 4728ef7..9f3b81a 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -143,9 +143,7 @@
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -308,7 +306,7 @@
 
 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
 {
-    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
 }
 
 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 4c1b833..ea70b8b 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/pool.h b/lib/common/pool.h
index 63954ca..e18aa07 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index e708df3..926b336 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -1,6 +1,6 @@
 /*
  *  xxHash - Fast Hash algorithm
- *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *  Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - xxHash homepage: http://www.xxhash.com
@@ -30,9 +30,7 @@
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+#  if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
   defined(__ICCARM__)
 #    define XXH_FORCE_MEMORY_ACCESS 1
diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index eceb55d..16c1f16 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -1,7 +1,7 @@
 /*
  * xxHash - Extremely Fast Hash algorithm
  * Header File
- * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - xxHash source repository : https://github.com/Cyan4973/xxHash
diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c
index 939e9f0..3d7e35b 100644
--- a/lib/common/zstd_common.c
+++ b/lib/common/zstd_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/zstd_deps.h b/lib/common/zstd_deps.h
index 0fb8b78..1421134 100644
--- a/lib/common/zstd_deps.h
+++ b/lib/common/zstd_deps.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 0991f20..68252e9 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,11 @@
 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */
+#ifndef ZSTD_NO_TRACE
+#  include "zstd_trace.h"
+#else
+#  define ZSTD_TRACE 0
+#endif
 
 #if defined (__cplusplus)
 extern "C" {
@@ -347,11 +352,18 @@
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;         /* Offset code of the sequence */
+    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
     U16 litLength;
     U16 matchLength;
 } seqDef;
 
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
+
 typedef struct {
     seqDef* sequencesStart;
     seqDef* sequences;      /* ptr to end of sequences */
@@ -363,12 +375,12 @@
     size_t maxNbSeq;
     size_t maxNbLit;
 
-    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
-     * the existing value of the litLength or matchLength by 0x10000. 
+     * the existing value of the litLength or matchLength by 0x10000.
      */
-    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
-    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+    ZSTD_longLengthType_e   longLengthType;
+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;
 
 typedef struct {
@@ -378,7 +390,7 @@
 
 /**
  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
  */
 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
 {
@@ -386,10 +398,10 @@
     seqLen.litLength = seq->litLength;
     seqLen.matchLength = seq->matchLength + MINMATCH;
     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-        if (seqStore->longLengthID == 1) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
             seqLen.litLength += 0xFFFF;
         }
-        if (seqStore->longLengthID == 2) {
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
             seqLen.matchLength += 0xFFFF;
         }
     }
diff --git a/lib/common/zstd_trace.h b/lib/common/zstd_trace.h
new file mode 100644
index 0000000..2da5640
--- /dev/null
+++ b/lib/common/zstd_trace.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_TRACE_H
+#define ZSTD_TRACE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/* weak symbol support */
+#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
+    !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
+    !defined(__CYGWIN__)
+#  define ZSTD_HAVE_WEAK_SYMBOLS 1
+#else
+#  define ZSTD_HAVE_WEAK_SYMBOLS 0
+#endif
+#if ZSTD_HAVE_WEAK_SYMBOLS
+#  define ZSTD_WEAK_ATTR __attribute__((__weak__))
+#else
+#  define ZSTD_WEAK_ATTR
+#endif
+
+/* Only enable tracing when weak symbols are available. */
+#ifndef ZSTD_TRACE
+#  define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
+#endif
+
+#if ZSTD_TRACE
+
+struct ZSTD_CCtx_s;
+struct ZSTD_DCtx_s;
+struct ZSTD_CCtx_params_s;
+
+typedef struct {
+    /**
+     * ZSTD_VERSION_NUMBER
+     *
+     * This is guaranteed to be the first member of ZSTD_trace.
+     * Otherwise, this struct is not stable between versions. If
+     * the version number does not match your expectation, you
+     * should not interpret the rest of the struct.
+     */
+    unsigned version;
+    /**
+     * Non-zero if streaming (de)compression is used.
+     */
+    unsigned streaming;
+    /**
+     * The dictionary ID.
+     */
+    unsigned dictionaryID;
+    /**
+     * Is the dictionary cold?
+     * Only set on decompression.
+     */
+    unsigned dictionaryIsCold;
+    /**
+     * The dictionary size or zero if no dictionary.
+     */
+    size_t dictionarySize;
+    /**
+     * The uncompressed size of the data.
+     */
+    size_t uncompressedSize;
+    /**
+     * The compressed size of the data.
+     */
+    size_t compressedSize;
+    /**
+     * The fully resolved CCtx parameters (NULL on decompression).
+     */
+    struct ZSTD_CCtx_params_s const* params;
+    /**
+     * The ZSTD_CCtx pointer (NULL on decompression).
+     */
+    struct ZSTD_CCtx_s const* cctx;
+    /**
+     * The ZSTD_DCtx pointer (NULL on compression).
+     */
+    struct ZSTD_DCtx_s const* dctx;
+} ZSTD_Trace;
+
+/**
+ * A tracing context. It must be 0 when tracing is disabled.
+ * Otherwise, any non-zero value returned by a tracing begin()
+ * function is presented to any subsequent calls to end().
+ *
+ * Any non-zero value is treated as tracing is enabled and not
+ * interpreted by the library.
+ *
+ * Two possible uses are:
+ * * A timestamp for when the begin() function was called.
+ * * A unique key identifying the (de)compression, like the
+ *   address of the [dc]ctx pointer if you need to track
+ *   more information than just a timestamp.
+ */
+typedef unsigned long long ZSTD_TraceCtx;
+
+/**
+ * Trace the beginning of a compression call.
+ * @param cctx The dctx pointer for the compression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
+    struct ZSTD_CCtx_s const* cctx);
+
+/**
+ * Trace the end of a compression call.
+ * @param ctx The return value of ZSTD_trace_compress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
+    ZSTD_TraceCtx ctx,
+    ZSTD_Trace const* trace);
+
+/**
+ * Trace the beginning of a decompression call.
+ * @param dctx The dctx pointer for the decompression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
+    struct ZSTD_DCtx_s const* dctx);
+
+/**
+ * Trace the end of a decompression call.
+ * @param ctx The return value of ZSTD_trace_decompress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
+    ZSTD_TraceCtx ctx,
+    ZSTD_Trace const* trace);
+
+#endif /* ZSTD_TRACE */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_TRACE_H */
diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 304a82b..b4297ec 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * FSE : Finite State Entropy encoder
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/compress/hist.c b/lib/compress/hist.c
index a9659d1..073c57e 100644
--- a/lib/compress/hist.c
+++ b/lib/compress/hist.c
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * hist : Histogram functions
  * part of Finite State Entropy project
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/compress/hist.h b/lib/compress/hist.h
index fb9ead6..228ed48 100644
--- a/lib/compress/hist.h
+++ b/lib/compress/hist.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * hist : Histogram functions
  * part of Finite State Entropy project
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index abbcc31..485906e 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * Huffman encoder, part of New Generation Entropy library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -59,7 +59,15 @@
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
  */
 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
 {
     BYTE* const ostart = (BYTE*) dst;
     BYTE* op = ostart;
@@ -67,33 +75,30 @@
 
     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
 
-    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
-    BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
-
-    unsigned count[HUF_TABLELOG_MAX+1];
-    S16 norm[HUF_TABLELOG_MAX+1];
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
 
     /* init conditions */
     if (wtSize <= 1) return 0;  /* Not compressible */
 
     /* Scan input and build symbol stats */
-    {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
     }
 
     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
 
     /* Write table description header */
-    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
         op += hSize;
     }
 
     /* Compress */
-    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
         if (cSize == 0) return 0;   /* not enough space for compressed data */
         op += cSize;
     }
@@ -102,29 +107,33 @@
 }
 
 
-/*! HUF_writeCTable() :
-    `CTable` : Huffman tree to save, using huf representation.
-    @return : size of saved CTable */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
-                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
-{
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
     BYTE* op = (BYTE*)dst;
     U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
 
-     /* check conditions */
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
 
     /* convert to weight */
-    bitsToWeight[0] = 0;
+    wksp->bitsToWeight[0] = 0;
     for (n=1; n<huffLog+1; n++)
-        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
     for (n=0; n<maxSymbolValue; n++)
-        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
 
     /* attempt weights compression by FSE */
-    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
             op[0] = (BYTE)hSize;
             return hSize+1;
@@ -134,12 +143,22 @@
     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
-    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
     for (n=0; n<maxSymbolValue; n+=2)
-        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
     return ((maxSymbolValue+1)/2) + 1;
 }
 
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+
 
 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
 {
@@ -732,29 +751,33 @@
 typedef struct {
     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
-    HUF_buildCTable_wksp_tables buildCTable_wksp;
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
 } HUF_compress_tables_t;
 
 /* HUF_compress_internal() :
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
 static size_t
 HUF_compress_internal (void* dst, size_t dstSize,
                  const void* src, size_t srcSize,
                        unsigned maxSymbolValue, unsigned huffLog,
                        HUF_nbStreams_e nbStreams,
-                       void* workSpace, size_t wkspSize,
+                       void* workSpace_align4, size_t wkspSize,
                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
                  const int bmi2)
 {
-    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart;
 
     HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
 
     /* checks & inits */
-    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
     if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
     if (!srcSize) return 0;  /* Uncompressed */
     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
@@ -772,7 +795,7 @@
     }
 
     /* Scan input and build symbol stats */
-    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
     }
@@ -794,7 +817,7 @@
     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
                                             maxSymbolValue, huffLog,
-                                            &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
         CHECK_F(maxBits);
         huffLog = (U32)maxBits;
         /* Zero unused symbols in CTable, so we can check it for validity */
@@ -803,7 +826,8 @@
     }
 
     /* Write table description header */
-    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
         /* Check if using previous huffman table is beneficial */
         if (repeat && *repeat != HUF_repeat_none) {
             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index eb7780c..b7ee298 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -72,6 +72,10 @@
     ZSTD_customMem customMem;
     U32 dictID;
     int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
+                                                     * row-based matchfinder. Unless the cdict is reloaded, we will use
+                                                     * the same greedy/lazy matchfinder at compression time.
+                                                     */
 };  /* typedef'd to ZSTD_CDict within "zstd.h" */
 
 ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -202,6 +206,49 @@
 /* private API call, for dictBuilder only */
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
 
+/* Returns true if the strategy supports using a row based matchfinder */
+static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
+    return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
+}
+
+/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
+ * for this compression.
+ */
+static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) {
+    assert(mode != ZSTD_urm_auto);
+    return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder);
+}
+
+/* Returns row matchfinder usage enum given an initial mode and cParams */
+static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode,
+                                                                   const ZSTD_compressionParameters* const cParams) {
+#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
+    int const kHasSIMD128 = 1;
+#else
+    int const kHasSIMD128 = 0;
+#endif
+    if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
+    mode = ZSTD_urm_disableRowMatchFinder;
+    if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
+    if (kHasSIMD128) {
+        if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder;
+    } else {
+        if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder;
+    }
+    return mode;
+}
+
+/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
+static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
+                                   const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                                   const U32 forDDSDict) {
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+    /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
+     * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
+     */
+    return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
+}
+
 /* Returns 1 if compression parameters are such that we should
  * enable long distance matching (wlog >= 27, strategy >= btopt).
  * Returns 0 otherwise.
@@ -210,6 +257,14 @@
     return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
 }
 
+/* Returns 1 if compression parameters are such that we should
+ * enable blockSplitter (wlog >= 17, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17;
+}
+
 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
         ZSTD_compressionParameters cParams)
 {
@@ -218,6 +273,7 @@
     ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
     cctxParams.cParams = cParams;
 
+    /* Adjust advanced params according to cParams */
     if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
         DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
         cctxParams.ldmParams.enableLdm = 1;
@@ -227,6 +283,12 @@
         assert(cctxParams.ldmParams.hashRateLog < 32);
     }
 
+    if (ZSTD_CParams_useBlockSplitter(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
+        cctxParams.splitBlocks = 1;
+    }
+
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
     assert(!ZSTD_checkCParams(cParams));
     return cctxParams;
 }
@@ -269,29 +331,48 @@
     return 0;
 }
 
+#define ZSTD_NO_CLEVEL 0
+
+/**
+ * Initializes the cctxParams from params and compressionLevel.
+ * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
+ */
+static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
+    DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder);
+}
+
 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
 {
     RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
     FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
-    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
-    assert(!ZSTD_checkCParams(params.cParams));
-    cctxParams->cParams = params.cParams;
-    cctxParams->fParams = params.fParams;
-    cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
+    ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
     return 0;
 }
 
-/* ZSTD_assignParamsToCCtxParams() :
- * params is presumed valid at this stage */
-static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
-        const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+/**
+ * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
+ * @param param Validated zstd parameters.
+ */
+static void ZSTD_CCtxParams_setZstdParams(
+        ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
 {
-    ZSTD_CCtx_params ret = *cctxParams;
     assert(!ZSTD_checkCParams(params->cParams));
-    ret.cParams = params->cParams;
-    ret.fParams = params->fParams;
-    ret.compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
-    return ret;
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
 }
 
 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
@@ -468,6 +549,21 @@
         bounds.upperBound = 1;
         return bounds;
 
+    case ZSTD_c_splitBlocks:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_useRowMatchFinder:
+        bounds.lowerBound = (int)ZSTD_urm_auto;
+        bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
+        return bounds;
+
+    case ZSTD_c_deterministicRefPrefix:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
     default:
         bounds.error = ERROR(parameter_unsupported);
         return bounds;
@@ -529,6 +625,9 @@
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
     case ZSTD_c_validateSequences:
+    case ZSTD_c_splitBlocks:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
     default:
         return 0;
     }
@@ -581,6 +680,9 @@
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
     case ZSTD_c_validateSequences:
+    case ZSTD_c_splitBlocks:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
         break;
 
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -792,17 +894,32 @@
         CCtxParams->validateSequences = value;
         return CCtxParams->validateSequences;
 
+    case ZSTD_c_splitBlocks:
+        BOUNDCHECK(ZSTD_c_splitBlocks, value);
+        CCtxParams->splitBlocks = value;
+        return CCtxParams->splitBlocks;
+
+    case ZSTD_c_useRowMatchFinder:
+        BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
+        CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
+        return CCtxParams->useRowMatchFinder;
+
+    case ZSTD_c_deterministicRefPrefix:
+        BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
+        CCtxParams->deterministicRefPrefix = !!value;
+        return CCtxParams->deterministicRefPrefix;
+
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
 }
 
-size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
+size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
 {
     return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
 }
 
 size_t ZSTD_CCtxParams_getParameter(
-        ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
+        ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
 {
     switch(param)
     {
@@ -915,6 +1032,15 @@
     case ZSTD_c_validateSequences :
         *value = (int)CCtxParams->validateSequences;
         break;
+    case ZSTD_c_splitBlocks :
+        *value = (int)CCtxParams->splitBlocks;
+        break;
+    case ZSTD_c_useRowMatchFinder :
+        *value = (int)CCtxParams->useRowMatchFinder;
+        break;
+    case ZSTD_c_deterministicRefPrefix:
+        *value = (int)CCtxParams->deterministicRefPrefix;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -1188,15 +1314,26 @@
     const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
     assert(ZSTD_checkCParams(cPar)==0);
 
-    if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
-        srcSize = minSrcSize;
-
     switch (mode) {
-    case ZSTD_cpm_noAttachDict:
     case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+        /* If we don't know the source size, don't make any
+         * assumptions about it. We will already have selected
+         * smaller parameters if a dictionary is in use.
+         */
+        break;
     case ZSTD_cpm_createCDict:
+        /* Assume a small source size when creating a dictionary
+         * with an unkown source size.
+         */
+        if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            srcSize = minSrcSize;
         break;
     case ZSTD_cpm_attachDict:
+        /* Dictionary has its own dedicated parameters which have
+         * already been selected. We are selecting parameters
+         * for only the source.
+         */
         dictSize = 0;
         break;
     default:
@@ -1213,7 +1350,8 @@
                             ZSTD_highbit32(tSize-1) + 1;
         if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
     }
-    {   U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
+    if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
         U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
         if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
         if (cycleLog > dictAndWindowLog)
@@ -1269,9 +1407,14 @@
 
 static size_t
 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                       const U32 enableDedicatedDictSearch,
                        const U32 forCCtx)
 {
-    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    /* chain table size should be 0 for fast or row-hash strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
     size_t const hSize = ((size_t)1) << cParams->hashLog;
     U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
@@ -1281,24 +1424,34 @@
                             + hSize * sizeof(U32)
                             + h3Size * sizeof(U32);
     size_t const optPotentialSpace =
-        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
-      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+        ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
+                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+                                            : 0;
     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
                                 ? optPotentialSpace
                                 : 0;
+    size_t const slackSpace = ZSTD_cwksp_slack_space_required();
+
+    /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
+    ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+
     DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
                 (U32)chainSize, (U32)hSize, (U32)h3Size);
-    return tableSpace + optSpace;
+    return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
 }
 
 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
         const ZSTD_compressionParameters* cParams,
         const ldmParams_t* ldmParams,
         const int isStatic,
+        const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
         const size_t buffInSize,
         const size_t buffOutSize,
         const U64 pledgedSrcSize)
@@ -1308,16 +1461,16 @@
     U32    const divider = (cParams->minMatch==3) ? 3 : 4;
     size_t const maxNbSeq = blockSize / divider;
     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
-                            + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
     size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
     size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
-    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
 
     size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
     size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
     size_t const ldmSeqSpace = ldmParams->enableLdm ?
-        ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+        ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
 
 
     size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
@@ -1343,25 +1496,45 @@
 {
     ZSTD_compressionParameters const cParams =
                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
+                                                                                         &cParams);
 
     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
     /* estimateCCtxSize is for one-shot compression. So no buffers should
      * be needed. However, we still allocate two 0-sized buffers, which can
      * take space under ASAN. */
     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
-    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+        noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+        rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+    }
 }
 
 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
 {
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
-    return ZSTD_estimateCCtxSize_usingCParams(cParams);
+    int tier = 0;
+    size_t largestSize = 0;
+    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
+    for (; tier < 4; ++tier) {
+        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
+        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
+    }
+    return largestSize;
 }
 
 size_t ZSTD_estimateCCtxSize(int compressionLevel)
@@ -1369,6 +1542,7 @@
     int level;
     size_t memBudget = 0;
     for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        /* Ensure monotonically increasing memory usage as compression level increases */
         size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
         if (newMB > memBudget) memBudget = newMB;
     }
@@ -1387,17 +1561,29 @@
         size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
+        ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-            &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
+            &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
             ZSTD_CONTENTSIZE_UNKNOWN);
     }
 }
 
 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
-    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+        noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+        rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+    }
 }
 
 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
@@ -1522,20 +1708,27 @@
     ZSTD_resetTarget_CCtx
 } ZSTD_resetTarget_e;
 
+
 static size_t
 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
                       ZSTD_cwksp* ws,
                 const ZSTD_compressionParameters* cParams,
+                const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
                 const ZSTD_compResetPolicy_e crp,
                 const ZSTD_indexResetPolicy_e forceResetIndex,
                 const ZSTD_resetTarget_e forWho)
 {
-    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    /* disable chain table allocation for fast or row-based strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
+                                                     ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
     size_t const hSize = ((size_t)1) << cParams->hashLog;
     U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
 
     DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    assert(useRowMatchFinder != ZSTD_urm_auto);
     if (forceResetIndex == ZSTDirp_reset) {
         ZSTD_window_init(&ms->window);
         ZSTD_cwksp_mark_tables_dirty(ws);
@@ -1574,11 +1767,23 @@
         ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
     }
 
+    if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+        {   /* Row match finder needs an additional table of hashes ("tags") */
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
+        }
+        {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
+            U32 const rowLog = cParams->searchLog < 5 ? 4 : 5;
+            assert(cParams->hashLog > rowLog);
+            ms->rowHashLog = cParams->hashLog - rowLog;
+        }
+    }
+
     ms->cParams = *cParams;
 
     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
                     "failed a workspace allocation in ZSTD_reset_matchState");
-
     return 0;
 }
 
@@ -1595,62 +1800,85 @@
     return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
 }
 
+/** ZSTD_dictTooBig():
+ * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
+ * one go generically. So we ensure that in that case we reset the tables to zero,
+ * so that we can load as much of the dictionary as possible.
+ */
+static int ZSTD_dictTooBig(size_t const loadedDictSize)
+{
+    return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
+}
+
 /*! ZSTD_resetCCtx_internal() :
-    note : `params` are assumed fully validated at this stage */
+ * @param loadedDictSize The size of the dictionary to be loaded
+ * into the context, if any. If no dictionary is used, or the
+ * dictionary is being attached / copied, then pass 0.
+ * note : `params` are assumed fully validated at this stage.
+ */
 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-                                      ZSTD_CCtx_params params,
+                                      ZSTD_CCtx_params const* params,
                                       U64 const pledgedSrcSize,
+                                      size_t const loadedDictSize,
                                       ZSTD_compResetPolicy_e const crp,
                                       ZSTD_buffered_policy_e const zbuff)
 {
     ZSTD_cwksp* const ws = &zc->workspace;
-    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
-                (U32)pledgedSrcSize, params.cParams.windowLog);
-    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
+                (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
 
     zc->isFirstBlock = 1;
 
-    if (params.ldmParams.enableLdm) {
+    /* Set applied params early so we can modify them for LDM,
+     * and point params at the applied params.
+     */
+    zc->appliedParams = *params;
+    params = &zc->appliedParams;
+
+    assert(params->useRowMatchFinder != ZSTD_urm_auto);
+    if (params->ldmParams.enableLdm) {
         /* Adjust long distance matching parameters */
-        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
-        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
-        assert(params.ldmParams.hashRateLog < 32);
-        zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
+        ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
+        assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
+        assert(params->ldmParams.hashRateLog < 32);
     }
 
-    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
-        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
         size_t const maxNbSeq = blockSize / divider;
-        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
-        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
                 ? windowSize + blockSize
                 : 0;
-        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
 
         int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
         ZSTD_indexResetPolicy_e needsIndexReset =
-            (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
+            (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
 
         size_t const neededSpace =
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-                &params.cParams, &params.ldmParams, zc->staticSize != 0,
+                &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
                 buffInSize, buffOutSize, pledgedSrcSize);
+        int resizeWorkspace;
+
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
 
         if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
 
-        /* Check if workspace is large enough, alloc a new one if needed */
-        {
+        {   /* Check if workspace is large enough, alloc a new one if needed */
             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
-
+            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
 
-            if (workspaceTooSmall || workspaceWasteful) {
+            if (resizeWorkspace) {
                 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
                             ZSTD_cwksp_sizeof(ws) >> 10,
                             neededSpace >> 10);
@@ -1678,8 +1906,7 @@
         ZSTD_cwksp_clear(ws);
 
         /* init params */
-        zc->appliedParams = params;
-        zc->blockState.matchState.cParams = params.cParams;
+        zc->blockState.matchState.cParams = params->cParams;
         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
         zc->consumedSrcSize = 0;
         zc->producedCSize = 0;
@@ -1692,6 +1919,7 @@
         XXH64_reset(&zc->xxhState, 0);
         zc->stage = ZSTDcs_init;
         zc->dictID = 0;
+        zc->dictContentSize = 0;
 
         ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
 
@@ -1709,13 +1937,13 @@
         zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
 
         /* ldm bucketOffsets table */
-        if (params.ldmParams.enableLdm) {
+        if (params->ldmParams.enableLdm) {
             /* TODO: avoid memset? */
-            size_t const ldmBucketSize =
-                  ((size_t)1) << (params.ldmParams.hashLog -
-                                  params.ldmParams.bucketSizeLog);
-            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
-            ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
+            size_t const numBuckets =
+                  ((size_t)1) << (params->ldmParams.hashLog -
+                                  params->ldmParams.bucketSizeLog);
+            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
+            ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
         }
 
         /* sequences storage */
@@ -1729,32 +1957,28 @@
         FORWARD_IF_ERROR(ZSTD_reset_matchState(
             &zc->blockState.matchState,
             ws,
-            &params.cParams,
+            &params->cParams,
+            params->useRowMatchFinder,
             crp,
             needsIndexReset,
             ZSTD_resetTarget_CCtx), "");
 
         /* ldm hash table */
-        if (params.ldmParams.enableLdm) {
+        if (params->ldmParams.enableLdm) {
             /* TODO: avoid memset? */
-            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
             zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
             ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
             zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
             zc->maxNbLdmSequences = maxNbLdmSeq;
 
             ZSTD_window_init(&zc->ldmState.window);
-            ZSTD_window_clear(&zc->ldmState.window);
             zc->ldmState.loadedDictEnd = 0;
         }
 
-        /* Due to alignment, when reusing a workspace, we can actually consume
-         * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
-         */
-        assert(ZSTD_cwksp_used(ws) >= neededSpace &&
-               ZSTD_cwksp_used(ws) <= neededSpace + 3);
-
+        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+
         zc->initialized = 1;
 
         return 0;
@@ -1810,6 +2034,8 @@
                         U64 pledgedSrcSize,
                         ZSTD_buffered_policy_e zbuff)
 {
+    DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
     {
         ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
         unsigned const windowLog = params.cParams.windowLog;
@@ -1825,7 +2051,9 @@
         params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
                                                      cdict->dictContentSize, ZSTD_cpm_attachDict);
         params.cParams.windowLog = windowLog;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+        params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
                                                  ZSTDcrp_makeClean, zbuff), "");
         assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
     }
@@ -1852,6 +2080,7 @@
     }   }
 
     cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
 
     /* copy block state */
     ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
@@ -1868,15 +2097,17 @@
     const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
 
     assert(!cdict->matchState.dedicatedDictSearch);
-
-    DEBUGLOG(4, "copying dictionary into context");
+    DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
 
     {   unsigned const windowLog = params.cParams.windowLog;
         assert(windowLog != 0);
         /* Copy only compression parameters related to tables. */
         params.cParams = *cdict_cParams;
         params.cParams.windowLog = windowLog;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+        params.useRowMatchFinder = cdict->useRowMatchFinder;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
                                                  ZSTDcrp_leaveDirty, zbuff), "");
         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
         assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
@@ -1884,17 +2115,30 @@
     }
 
     ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+    assert(params.useRowMatchFinder != ZSTD_urm_auto);
 
     /* copy tables */
-    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+    {   size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
+                                                            ? ((size_t)1 << cdict_cParams->chainLog)
+                                                            : 0;
         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
 
         ZSTD_memcpy(cctx->blockState.matchState.hashTable,
                cdict->matchState.hashTable,
                hSize * sizeof(U32));
-        ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+        /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
+        if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
+            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
                cdict->matchState.chainTable,
                chainSize * sizeof(U32));
+        }
+        /* copy tag table */
+        if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ZSTD_memcpy(cctx->blockState.matchState.tagTable,
+                cdict->matchState.tagTable,
+                tagTableSize);
+        }
     }
 
     /* Zero the hashTable3, since the cdict never fills it */
@@ -1915,6 +2159,7 @@
     }
 
     cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
 
     /* copy block state */
     ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
@@ -1957,16 +2202,18 @@
                             U64 pledgedSrcSize,
                             ZSTD_buffered_policy_e zbuff)
 {
-    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
                     "Can't copy a ctx that's not in init stage.");
-
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
     {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
         /* Copy only compression parameters related to tables. */
         params.cParams = srcCCtx->appliedParams.cParams;
+        assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto);
+        params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
         params.fParams = fParams;
-        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+        ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
+                                /* loadedDictSize */ 0,
                                 ZSTDcrp_leaveDirty, zbuff);
         assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
         assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
@@ -1978,7 +2225,11 @@
     ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
 
     /* copy tables */
-    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+    {   size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
+                                                         srcCCtx->appliedParams.useRowMatchFinder,
+                                                         0 /* forDDSDict */)
+                                    ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
+                                    : 0;
         size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
         int const h3log = srcCCtx->blockState.matchState.hashLog3;
         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
@@ -2005,6 +2256,7 @@
         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
     }
     dstCCtx->dictID = srcCCtx->dictID;
+    dstCCtx->dictContentSize = srcCCtx->dictContentSize;
 
     /* copy block state */
     ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
@@ -2091,7 +2343,7 @@
         ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
     }
 
-    if (params->cParams.strategy != ZSTD_fast) {
+    if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
         U32 const chainSize = (U32)1 << params->cParams.chainLog;
         if (params->cParams.strategy == ZSTD_btlazy2)
             ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
@@ -2128,9 +2380,9 @@
         ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
     }
-    if (seqStorePtr->longLengthID==1)
+    if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-    if (seqStorePtr->longLengthID==2)
+    if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
@@ -2144,10 +2396,158 @@
     return (cctxParams->targetCBlockSize != 0);
 }
 
-/* ZSTD_entropyCompressSequences_internal():
- * actually compresses both literals and sequences */
+/* ZSTD_blockSplitterEnabled():
+ * Returns if block splitting param is being used
+ * If used, compression will do best effort to split a block in order to improve compression ratio.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks);
+    return (cctxParams->splitBlocks != 0);
+}
+
+/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
+ * and size of the sequences statistics
+ */
+typedef struct {
+    U32 LLtype;
+    U32 Offtype;
+    U32 MLtype;
+    size_t size;
+    size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_symbolEncodingTypeStats_t;
+
+/* ZSTD_buildSequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
+ * Modifies `nextEntropy` to have the appropriate values as a side effect.
+ * nbSeq must be greater than 0.
+ *
+ * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
+                              BYTE* dst, const BYTE* const dstEnd,
+                              ZSTD_strategy strategy, unsigned* countWorkspace,
+                              void* entropyWorkspace, size_t entropyWkspSize) {
+    BYTE* const ostart = dst;
+    const BYTE* const oend = dstEnd;
+    BYTE* op = ostart;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    stats.lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    assert(op <= oend);
+    assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
+                countWorkspace, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->litlengthCTable,
+                sizeof(prevEntropy->litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.LLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
+                countWorkspace, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->offcodeCTable,
+                sizeof(prevEntropy->offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.Offtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
+                countWorkspace, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->matchlengthCTable,
+                sizeof(prevEntropy->matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.MLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    stats.size = (size_t)(op-ostart);
+    return stats;
+}
+
+/* ZSTD_entropyCompressSeqStore_internal():
+ * compresses both literals and sequences
+ * Returns compressed size of block, or a zstd error.
+ */
 MEM_STATIC size_t
-ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
                           const ZSTD_entropyCTables_t* prevEntropy,
                                 ZSTD_entropyCTables_t* nextEntropy,
                           const ZSTD_CCtx_params* cctxParams,
@@ -2161,22 +2561,20 @@
     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
-    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
     const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
     const BYTE* const llCodeTable = seqStorePtr->llCode;
     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-    BYTE* seqHead;
-    BYTE* lastNCount = NULL;
+    size_t lastCountSize;
 
     entropyWorkspace = count + (MaxSeq + 1);
     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
 
-    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
 
@@ -2216,95 +2614,20 @@
         ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
         return (size_t)(op - ostart);
     }
-
-    /* seqHead : flags for FSE encoding type */
-    seqHead = op++;
-    assert(op <= oend);
-
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->fse.litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                count, max, llCodeTable, nbSeq,
-                LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                prevEntropy->fse.litlengthCTable,
-                sizeof(prevEntropy->fse.litlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for Offsets */
-    {   unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->fse.offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                count, max, ofCodeTable, nbSeq,
-                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                prevEntropy->fse.offcodeCTable,
-                sizeof(prevEntropy->fse.offcodeCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for MatchLengths */
-    {   unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                count, max, mlCodeTable, nbSeq,
-                ML_defaultNorm, ML_defaultNormLog, MaxML,
-                prevEntropy->fse.matchlengthCTable,
-                sizeof(prevEntropy->fse.matchlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-
-    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+    {
+        ZSTD_symbolEncodingTypeStats_t stats;
+        BYTE* seqHead = op++;
+        /* build stats for sequences */
+        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                             &prevEntropy->fse, &nextEntropy->fse,
+                                              op, oend,
+                                              strategy, count,
+                                              entropyWorkspace, entropyWkspSize);
+        FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+        *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
+        lastCountSize = stats.lastCountSize;
+        op += stats.size;
+    }
 
     {   size_t const bitstreamSize = ZSTD_encodeSequences(
                                         op, (size_t)(oend - op),
@@ -2324,9 +2647,9 @@
          * In this exceedingly rare case, we will simply emit an uncompressed
          * block, since it isn't worth optimizing.
          */
-        if (lastNCount && (op - lastNCount) < 4) {
-            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
-            assert(op - lastNCount == 3);
+        if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
+            /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(lastCountSize + bitstreamSize == 3);
             DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
                         "emitting an uncompressed block.");
             return 0;
@@ -2338,7 +2661,7 @@
 }
 
 MEM_STATIC size_t
-ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
                        const ZSTD_entropyCTables_t* prevEntropy,
                              ZSTD_entropyCTables_t* nextEntropy,
                        const ZSTD_CCtx_params* cctxParams,
@@ -2347,7 +2670,7 @@
                              void* entropyWorkspace, size_t entropyWkspSize,
                              int bmi2)
 {
-    size_t const cSize = ZSTD_entropyCompressSequences_internal(
+    size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
                             dst, dstCapacity,
                             entropyWorkspace, entropyWkspSize, bmi2);
@@ -2357,20 +2680,20 @@
      */
     if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
         return 0;  /* block not compressed */
-    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
 
     /* Check compressibility */
     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
         if (cSize >= maxCSize) return 0;  /* block not compressed */
     }
-    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
     return cSize;
 }
 
 /* ZSTD_selectBlockCompressor() :
  * Not static, but internal use only (used by long distance matcher)
  * assumption : strat is a valid strategy */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
 {
     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
         { ZSTD_compressBlock_fast  /* default for 0 */,
@@ -2418,7 +2741,28 @@
     ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
 
     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
-    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
+    if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
+        static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
+            { ZSTD_compressBlock_greedy_row,
+            ZSTD_compressBlock_lazy_row,
+            ZSTD_compressBlock_lazy2_row },
+            { ZSTD_compressBlock_greedy_extDict_row,
+            ZSTD_compressBlock_lazy_extDict_row,
+            ZSTD_compressBlock_lazy2_extDict_row },
+            { ZSTD_compressBlock_greedy_dictMatchState_row,
+            ZSTD_compressBlock_lazy_dictMatchState_row,
+            ZSTD_compressBlock_lazy2_dictMatchState_row },
+            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+        };
+        DEBUGLOG(4, "Selecting a row-based matchfinder");
+        assert(useRowMatchFinder != ZSTD_urm_auto);
+        selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
+    } else {
+        selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    }
     assert(selectedCompressor != NULL);
     return selectedCompressor;
 }
@@ -2434,7 +2778,7 @@
 {
     ssPtr->lit = ssPtr->litStart;
     ssPtr->sequences = ssPtr->sequencesStart;
-    ssPtr->longLengthID = 0;
+    ssPtr->longLengthType = ZSTD_llt_none;
 }
 
 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
@@ -2487,6 +2831,7 @@
                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
                                        ms, &zc->seqStore,
                                        zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
         } else if (zc->appliedParams.ldmParams.enableLdm) {
@@ -2503,10 +2848,13 @@
                 ZSTD_ldm_blockCompress(&ldmSeqStore,
                                        ms, &zc->seqStore,
                                        zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(ldmSeqStore.pos == ldmSeqStore.size);
         } else {   /* not long range mode */
-            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+                                                                                    zc->appliedParams.useRowMatchFinder,
+                                                                                    dictMode);
             ms->ldmSeqStore = NULL;
             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
         }
@@ -2540,9 +2888,9 @@
         outSeqs[i].rep = 0;
 
         if (i == seqStore->longLengthPos) {
-            if (seqStore->longLengthID == 1) {
+            if (seqStore->longLengthType == ZSTD_llt_literalLength) {
                 outSeqs[i].litLength += 0x10000;
-            } else if (seqStore->longLengthID == 2) {
+            } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
                 outSeqs[i].matchLength += 0x10000;
             }
         }
@@ -2653,11 +3001,713 @@
     return nbSeqs < 4 && nbLits < 10;
 }
 
-static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
 {
-    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
-    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
-    zc->blockState.nextCBlock = tmp;
+    ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
+    bs->prevCBlock = bs->nextCBlock;
+    bs->nextCBlock = tmp;
+}
+
+/* Writes the block header */
+static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
+    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+    MEM_writeLE24(op, cBlockHeader);
+    DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
+}
+
+/** ZSTD_buildBlockEntropyStats_literals() :
+ *  Builds entropy for the literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE workspace
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#ifndef COMPRESS_LITERALS_SIZE_MIN
+#define COMPRESS_LITERALS_SIZE_MIN 63
+#endif
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+
+/* ZSTD_buildDummySequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
+ * and updates nextEntropy to the appropriate repeatMode.
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
+    nextEntropy->litlength_repeatMode = FSE_repeat_none;
+    nextEntropy->offcode_repeatMode = FSE_repeat_none;
+    nextEntropy->matchlength_repeatMode = FSE_repeat_none;
+    return stats;
+}
+
+/** ZSTD_buildBlockEntropyStats_sequences() :
+ *  Builds entropy for the sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE wksp.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+    unsigned* countWorkspace = (unsigned*)workspace;
+    unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
+    size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
+    stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                          prevEntropy, nextEntropy, op, oend,
+                                          strategy, countWorkspace,
+                                          entropyWorkspace, entropyWorkspaceSize)
+                       : ZSTD_buildDummySequencesStatistics(nextEntropy);
+    FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+    fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
+    fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
+    fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
+    fseMetadata->lastCountSize = stats.lastCountSize;
+    return stats.size;
+}
+
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  Requires workspace size ENTROPY_WORKSPACE_SIZE
+ *
+ *  @return : 0 on success or error code
+ */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
+    return 0;
+}
+
+/* Returns the size estimate for the literals section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
+    U32 singleStream = litSize < 256;
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
+static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
+                        const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        (void)defaultMax;
+        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
+        return nbSeq * 10;
+    }
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits >> 3;
+}
+
+/* Returns the size estimate for the sequences section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
+                                         fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
+                                         fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
+                                         fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+/* Returns the size estimate for a given stream of literals, of, ll, ml */
+static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
+                                     const BYTE* ofCodeTable,
+                                     const BYTE* llCodeTable,
+                                     const BYTE* mlCodeTable,
+                                     size_t nbSeq,
+                                     const ZSTD_entropyCTables_t* entropy,
+                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                     void* workspace, size_t wkspSize,
+                                     int writeLitEntropy, int writeSeqEntropy) {
+    size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return seqSize + literalsSize + ZSTD_blockHeaderSize;
+}
+
+/* Builds entropy statistics and uses them for blocksize estimation.
+ *
+ * Returns the estimated compressed size of the seqStore, or a zstd error.
+ */
+static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
+                    &zc->blockState.prevCBlock->entropy,
+                    &zc->blockState.nextCBlock->entropy,
+                    &zc->appliedParams,
+                    &entropyMetadata,
+                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
+                    seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
+                    (size_t)(seqStore->sequences - seqStore->sequencesStart),
+                    &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+                    (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1);
+}
+
+/* Returns literals bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
+    size_t literalsBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        literalsBytes += seq.litLength;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
+            literalsBytes += 0x10000;
+        }
+    }
+    return literalsBytes;
+}
+
+/* Returns match bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
+    size_t matchBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        matchBytes += seq.matchLength + MINMATCH;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
+            matchBytes += 0x10000;
+        }
+    }
+    return matchBytes;
+}
+
+/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
+ * Stores the result in resultSeqStore.
+ */
+static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+                               const seqStore_t* originalSeqStore,
+                                     size_t startIdx, size_t endIdx) {
+    BYTE* const litEnd = originalSeqStore->lit;
+    size_t literalsBytes;
+    size_t literalsBytesPreceding = 0;
+
+    *resultSeqStore = *originalSeqStore;
+    if (startIdx > 0) {
+        resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
+        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    }
+
+    /* Move longLengthPos into the correct position if necessary */
+    if (originalSeqStore->longLengthType != ZSTD_llt_none) {
+        if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
+            resultSeqStore->longLengthType = ZSTD_llt_none;
+        } else {
+            resultSeqStore->longLengthPos -= (U32)startIdx;
+        }
+    }
+    resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
+    resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
+    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    resultSeqStore->litStart += literalsBytesPreceding;
+    if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
+        /* This accounts for possible last literals if the derived chunk reaches the end of the block */
+        resultSeqStore->lit = litEnd;
+    } else {
+        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
+    }
+    resultSeqStore->llCode += startIdx;
+    resultSeqStore->mlCode += startIdx;
+    resultSeqStore->ofCode += startIdx;
+}
+
+/**
+ * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
+ * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
+ */
+static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) {
+    U32 const adjustedOffCode = offCode + ll0;
+    assert(offCode < ZSTD_REP_NUM);
+    if (adjustedOffCode == ZSTD_REP_NUM) {
+        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
+        assert(rep[0] > 0);
+        return rep[0] - 1;
+    }
+    return rep[adjustedOffCode];
+}
+
+/**
+ * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
+ * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
+ * the seqStore that may be invalid.
+ *
+ * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
+ * accordance with the seqStore.
+ */
+static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
+                                          seqStore_t* const seqStore, U32 const nbSeq) {
+    U32 idx = 0;
+    for (; idx < nbSeq; ++idx) {
+        seqDef* const seq = seqStore->sequencesStart + idx;
+        U32 const ll0 = (seq->litLength == 0);
+        U32 offCode = seq->offset - 1;
+        assert(seq->offset > 0);
+        if (offCode <= ZSTD_REP_MOVE) {
+            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
+            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
+            /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
+             * the repcode with the offset it actually references, determined by the compression
+             * repcode history.
+             */
+            if (dRawOffset != cRawOffset) {
+                seq->offset = cRawOffset + ZSTD_REP_NUM;
+            }
+        }
+        /* Compression repcode history is always updated with values directly from the unmodified seqStore.
+         * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
+         */
+        *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0);
+        *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
+    }
+}
+
+/* ZSTD_compressSeqStore_singleBlock():
+ * Compresses a seqStore into a block with a block header, into the buffer dst.
+ *
+ * Returns the total size of that block (including header) or a ZSTD error code.
+ */
+static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+                                                repcodes_t* const dRep, repcodes_t* const cRep,
+                                                void* dst, size_t dstCapacity,
+                                                const void* src, size_t srcSize,
+                                                U32 lastBlock, U32 isPartition) {
+    const U32 rleMaxLength = 25;
+    BYTE* op = (BYTE*)dst;
+    const BYTE* ip = (const BYTE*)src;
+    size_t cSize;
+    size_t cSeqsSize;
+
+    /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
+    repcodes_t const dRepOriginal = *dRep;
+    if (isPartition)
+        ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
+
+    cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
+                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                &zc->appliedParams,
+                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
+                srcSize,
+                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                zc->bmi2);
+    FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
+
+    if (!zc->isFirstBlock &&
+        cSeqsSize < rleMaxLength &&
+        ZSTD_isRLE((BYTE const*)src, srcSize)) {
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+        * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+        * This is only an issue for zstd <= v1.4.3
+        */
+        cSeqsSize = 1;
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        return 0;
+    }
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    if (cSeqsSize == 0) {
+        cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "Nocompress block failed");
+        DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else if (cSeqsSize == 1) {
+        cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "RLE compress block failed");
+        DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else {
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
+        cSize = ZSTD_blockHeaderSize + cSeqsSize;
+        DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
+    }
+    return cSize;
+}
+
+/* Struct to keep track of where we are in our recursive calls. */
+typedef struct {
+    U32* splitLocations;    /* Array of split indices */
+    size_t idx;             /* The current index within splitLocations being worked on */
+} seqStoreSplits;
+
+#define MIN_SEQUENCES_BLOCK_SPLITTING 300
+#define MAX_NB_SPLITS 196
+
+/* Helper function to perform the recursive search for block splits.
+ * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
+ * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
+ * we do not recurse.
+ *
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
+ * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
+ * In practice, recursion depth usually doesn't go beyond 4.
+ *
+ * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
+ * maximum of 128 KB, this value is actually impossible to reach.
+ */
+static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
+                                         const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
+    seqStore_t fullSeqStoreChunk;
+    seqStore_t firstHalfSeqStore;
+    seqStore_t secondHalfSeqStore;
+    size_t estimatedOriginalSize;
+    size_t estimatedFirstHalfSize;
+    size_t estimatedSecondHalfSize;
+    size_t midIdx = (startIdx + endIdx)/2;
+
+    if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
+        return;
+    }
+    ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
+    ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
+    ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
+    estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
+    estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
+    estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
+    DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
+             estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
+    if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
+        return;
+    }
+    if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
+        ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
+        splits->splitLocations[splits->idx] = (U32)midIdx;
+        splits->idx++;
+        ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
+    }
+}
+
+/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
+ *
+ * Returns the number of splits made (which equals the size of the partition table - 1).
+ */
+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
+    seqStoreSplits splits = {partitions, 0};
+    if (nbSeq <= 4) {
+        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
+        /* Refuse to try and split anything with less than 4 sequences */
+        return 0;
+    }
+    ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
+    splits.splitLocations[splits.idx] = nbSeq;
+    DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
+    return splits.idx;
+}
+
+/* ZSTD_compressBlock_splitBlock():
+ * Attempts to split a given block into multiple blocks to improve compression ratio.
+ *
+ * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
+ */
+static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
+                                                     const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
+    size_t cSize = 0;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 partitions[MAX_NB_SPLITS];
+    size_t i = 0;
+    size_t srcBytesTotal = 0;
+    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
+    seqStore_t nextSeqStore;
+    seqStore_t currSeqStore;
+
+    /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
+     * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
+     * separate repcode histories that simulate repcode history on compression and decompression side,
+     * and use the histories to determine whether we must replace a particular repcode with its raw offset.
+     *
+     * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
+     *    or RLE. This allows us to retrieve the offset value that an invalid repcode references within
+     *    a nocompress/RLE block.
+     * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
+     *    the replacement offset value rather than the original repcode to update the repcode history.
+     *    dRep also will be the final repcode history sent to the next block.
+     *
+     * See ZSTD_seqStore_resolveOffCodes() for more details.
+     */
+    repcodes_t dRep;
+    repcodes_t cRep;
+    ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    if (numSplits == 0) {
+        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+                                                                   &dRep, &cRep,
+                                                                    op, dstCapacity,
+                                                                    ip, blockSize,
+                                                                    lastBlock, 0 /* isPartition */);
+        FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
+        DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
+        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+        return cSizeSingleBlock;
+    }
+
+    ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
+    for (i = 0; i <= numSplits; ++i) {
+        size_t srcBytes;
+        size_t cSizeChunk;
+        U32 const lastPartition = (i == numSplits);
+        U32 lastBlockEntireSrc = 0;
+
+        srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
+        srcBytesTotal += srcBytes;
+        if (lastPartition) {
+            /* This is the final partition, need to account for possible last literals */
+            srcBytes += blockSize - srcBytesTotal;
+            lastBlockEntireSrc = lastBlock;
+        } else {
+            ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
+        }
+
+        cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
+                                                      &dRep, &cRep,
+                                                       op, dstCapacity,
+                                                       ip, srcBytes,
+                                                       lastBlockEntireSrc, 1 /* isPartition */);
+        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
+        FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
+
+        ip += srcBytes;
+        op += cSizeChunk;
+        dstCapacity -= cSizeChunk;
+        cSize += cSizeChunk;
+        currSeqStore = nextSeqStore;
+        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+    }
+    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
+     * for the next block.
+     */
+    ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 lastBlock) {
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 nbSeq;
+    size_t cSize;
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) {
+            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+            FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+            DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
+            return cSize;
+        }
+        nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
+    }
+
+    assert(zc->appliedParams.splitBlocks == 1);
+    cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
+    FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
+    return cSize;
 }
 
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
@@ -2683,12 +3733,12 @@
 
     if (zc->seqCollector.collectSequences) {
         ZSTD_copyBlockSequences(zc);
-        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
         return 0;
     }
 
     /* encode sequences and literals */
-    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
+    cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
             &zc->appliedParams,
             dst, dstCapacity,
@@ -2717,7 +3767,7 @@
 
 out:
     if (!ZSTD_isError(cSize) && cSize > 1) {
-        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
     }
     /* We check that dictionaries have offset codes available for the first
      * block. After the first block, the offcode table might not have large
@@ -2770,7 +3820,7 @@
                 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
-                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
                     return cSize;
                 }
             }
@@ -2810,9 +3860,9 @@
                                          void const* ip,
                                          void const* iend)
 {
-    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
-        U32 const maxDist = (U32)1 << params->cParams.windowLog;
-        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const maxDist = (U32)1 << params->cParams.windowLog;
+    if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
         U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
         ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
@@ -2835,7 +3885,7 @@
 *   Frame is supposed already started (header already produced)
 *   @return : compressed size, or an error code
 */
-static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
                                      void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize,
                                      U32 lastFrameChunk)
@@ -2875,6 +3925,10 @@
                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
                 assert(cSize > 0);
                 assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
+                assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
             } else {
                 cSize = ZSTD_compressBlock_internal(cctx,
                                         op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -2954,6 +4008,26 @@
     return pos;
 }
 
+/* ZSTD_writeSkippableFrame_advanced() :
+ * Writes out a skippable frame with the specified magic number variant (16 are supported),
+ * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
+ *
+ * Returns the total number of bytes written, or a ZSTD error code.
+ */
+size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize, unsigned magicVariant) {
+    BYTE* op = (BYTE*)dst;
+    RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
+                    dstSize_tooSmall, "Not enough room for skippable frame");
+    RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
+    RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
+
+    MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
+    MEM_writeLE32(op+4, (U32)srcSize);
+    ZSTD_memcpy(op+8, src, srcSize);
+    return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
+}
+
 /* ZSTD_writeLastEmptyBlock() :
  * output an empty Block with end-of-frame mark to complete a frame
  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
@@ -3010,11 +4084,12 @@
 
     if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
 
-    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+    if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
+        ms->forceNonContiguous = 0;
         ms->nextToUpdate = ms->window.dictLimit;
     }
     if (cctx->appliedParams.ldmParams.enableLdm) {
-        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
     }
 
     if (!frame) {
@@ -3082,63 +4157,86 @@
 {
     const BYTE* ip = (const BYTE*) src;
     const BYTE* const iend = ip + srcSize;
-
-    ZSTD_window_update(&ms->window, src, srcSize);
-    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
-
-    if (params->ldmParams.enableLdm && ls != NULL) {
-        ZSTD_window_update(&ls->window, src, srcSize);
-        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
-    }
+    int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL;
 
     /* Assert that we the ms params match the params we're being given */
     ZSTD_assertEqualCParams(params->cParams, ms->cParams);
 
+    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
+        /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
+         * Dictionaries right at the edge will immediately trigger overflow
+         * correction, but I don't want to insert extra constraints here.
+         */
+        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
+        /* We must have cleared our windows when our source is this large. */
+        assert(ZSTD_window_isEmpty(ms->window));
+        if (loadLdmDict)
+            assert(ZSTD_window_isEmpty(ls->window));
+        /* If the dictionary is too large, only load the suffix of the dictionary. */
+        if (srcSize > maxDictSize) {
+            ip = iend - maxDictSize;
+            src = ip;
+            srcSize = maxDictSize;
+        }
+    }
+
+    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
+    ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+    ms->forceNonContiguous = params->deterministicRefPrefix;
+
+    if (loadLdmDict) {
+        ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
     if (srcSize <= HASH_READ_SIZE) return 0;
 
-    while (iend - ip > HASH_READ_SIZE) {
-        size_t const remaining = (size_t)(iend - ip);
-        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
-        const BYTE* const ichunk = ip + chunk;
+    ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
 
-        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+    if (loadLdmDict)
+        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
 
-        if (params->ldmParams.enableLdm && ls != NULL)
-            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+    switch(params->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, dtlm);
+        break;
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
+        break;
 
-        switch(params->cParams.strategy)
-        {
-        case ZSTD_fast:
-            ZSTD_fillHashTable(ms, ichunk, dtlm);
-            break;
-        case ZSTD_dfast:
-            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
-            break;
-
-        case ZSTD_greedy:
-        case ZSTD_lazy:
-        case ZSTD_lazy2:
-            if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
-                assert(chunk == remaining); /* must load everything in one go */
-                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
-            } else if (chunk >= HASH_READ_SIZE) {
-                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+        assert(srcSize >= HASH_READ_SIZE);
+        if (ms->dedicatedDictSearch) {
+            assert(ms->chainTable != NULL);
+            ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
+        } else {
+            assert(params->useRowMatchFinder != ZSTD_urm_auto);
+            if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
+                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+                ZSTD_memset(ms->tagTable, 0, tagTableSize);
+                ZSTD_row_update(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using row-based hash table for lazy dict");
+            } else {
+                ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using chain-based hash table for lazy dict");
             }
-            break;
-
-        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
-        case ZSTD_btopt:
-        case ZSTD_btultra:
-        case ZSTD_btultra2:
-            if (chunk >= HASH_READ_SIZE)
-                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
-            break;
-
-        default:
-            assert(0);  /* not possible : not a valid strategy id */
         }
+        break;
 
-        ip = ichunk;
+    case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        assert(srcSize >= HASH_READ_SIZE);
+        ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+        break;
+
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
     }
 
     ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3258,7 +4356,7 @@
 
 /* Dictionary format :
  * See :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
  */
 /*! ZSTD_loadZstdDictionary() :
  * @return : dictID, or an error code
@@ -3277,7 +4375,6 @@
     const BYTE* const dictEnd = dictPtr + dictSize;
     size_t dictID;
     size_t eSize;
-
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(dictSize >= 8);
     assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
@@ -3348,6 +4445,10 @@
                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
                                     ZSTD_buffered_policy_e zbuff)
 {
+    size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+#if ZSTD_TRACE
+    cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
+#endif
     DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
     /* params are supposed to be fully validated at this point */
     assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
@@ -3362,7 +4463,8 @@
         return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
     }
 
-    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                     dictContentSize,
                                      ZSTDcrp_makeClean, zbuff) , "");
     {   size_t const dictID = cdict ?
                 ZSTD_compress_insertDictionary(
@@ -3377,6 +4479,7 @@
         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
         assert(dictID <= UINT_MAX);
         cctx->dictID = (U32)dictID;
+        cctx->dictContentSize = dictContentSize;
     }
     return 0;
 }
@@ -3405,8 +4508,8 @@
                              const void* dict, size_t dictSize,
                                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
 {
-    ZSTD_CCtx_params const cctxParams =
-            ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
     return ZSTD_compressBegin_advanced_internal(cctx,
                                             dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
                                             NULL /*cdict*/,
@@ -3415,9 +4518,11 @@
 
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
-    ZSTD_CCtx_params const cctxParams =
-            ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
+    }
     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
     return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
                                        &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
@@ -3471,6 +4576,30 @@
     return op-ostart;
 }
 
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
+{
+#if ZSTD_TRACE
+    if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
+        int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;
+        ZSTD_Trace trace;
+        ZSTD_memset(&trace, 0, sizeof(trace));
+        trace.version = ZSTD_VERSION_NUMBER;
+        trace.streaming = streaming;
+        trace.dictionaryID = cctx->dictID;
+        trace.dictionarySize = cctx->dictContentSize;
+        trace.uncompressedSize = cctx->consumedSrcSize;
+        trace.compressedSize = cctx->producedCSize + extraCSize;
+        trace.params = &cctx->appliedParams;
+        trace.cctx = cctx;
+        ZSTD_trace_compress_end(cctx->traceCtx, &trace);
+    }
+    cctx->traceCtx = 0;
+#else
+    (void)cctx;
+    (void)extraCSize;
+#endif
+}
+
 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
                          void* dst, size_t dstCapacity,
                    const void* src, size_t srcSize)
@@ -3493,25 +4622,10 @@
             (unsigned)cctx->pledgedSrcSizePlusOne-1,
             (unsigned)cctx->consumedSrcSize);
     }
+    ZSTD_CCtx_trace(cctx, endResult);
     return cSize + endResult;
 }
 
-static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
-                                      void* dst, size_t dstCapacity,
-                                const void* src, size_t srcSize,
-                                const void* dict,size_t dictSize,
-                                const ZSTD_parameters* params)
-{
-    ZSTD_CCtx_params const cctxParams =
-            ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
-    DEBUGLOG(4, "ZSTD_compress_internal");
-    return ZSTD_compress_advanced_internal(cctx,
-                                           dst, dstCapacity,
-                                           src, srcSize,
-                                           dict, dictSize,
-                                           &cctxParams);
-}
-
 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
                                void* dst, size_t dstCapacity,
                          const void* src, size_t srcSize,
@@ -3520,11 +4634,12 @@
 {
     DEBUGLOG(4, "ZSTD_compress_advanced");
     FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
-    return ZSTD_compress_internal(cctx,
-                                  dst, dstCapacity,
-                                  src, srcSize,
-                                  dict, dictSize,
-                                  &params);
+    ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compress_advanced_internal(cctx,
+                                           dst, dstCapacity,
+                                           src, srcSize,
+                                           dict, dictSize,
+                                           &cctx->simpleApiParams);
 }
 
 /* Internal */
@@ -3548,11 +4663,13 @@
                          const void* dict, size_t dictSize,
                                int compressionLevel)
 {
-    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
-    ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
+        assert(params.fParams.contentSizeFlag == 1);
+        ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+    }
     DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
-    assert(params.fParams.contentSizeFlag == 1);
-    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
 }
 
 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3596,7 +4713,10 @@
     DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
     return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
          + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
-         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
+          * in case we are using DDS with row-hash. */
+         + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams),
+                                  /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
          + (dictLoadMethod == ZSTD_dlm_byRef ? 0
             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
 }
@@ -3627,9 +4747,6 @@
     assert(!ZSTD_checkCParams(params.cParams));
     cdict->matchState.cParams = params.cParams;
     cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
-    if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
-        cdict->matchState.dedicatedDictSearch = 0;
-    }
     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
         cdict->dictContent = dictBuffer;
     } else {
@@ -3650,6 +4767,7 @@
         &cdict->matchState,
         &cdict->workspace,
         &params.cParams,
+        params.useRowMatchFinder,
         ZSTDcrp_makeClean,
         ZSTDirp_reset,
         ZSTD_resetTarget_CDict), "");
@@ -3673,14 +4791,17 @@
 
 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
                                       ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                                      U32 enableDedicatedDictSearch,
+                                      ZSTD_customMem customMem)
 {
     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
 
     {   size_t const workspaceSize =
             ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
             ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
-            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
             (dictLoadMethod == ZSTD_dlm_byRef ? 0
              : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
         void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
@@ -3698,8 +4819,8 @@
         assert(cdict != NULL);
         ZSTD_cwksp_move(&cdict->workspace, &ws);
         cdict->customMem = customMem;
-        cdict->compressionLevel = 0; /* signals advanced API usage */
-
+        cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
+        cdict->useRowMatchFinder = useRowMatchFinder;
         return cdict;
     }
 }
@@ -3751,10 +4872,13 @@
             &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     }
 
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
     cctxParams.cParams = cParams;
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
 
     cdict = ZSTD_createCDict_advanced_internal(dictSize,
                         dictLoadMethod, cctxParams.cParams,
+                        cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
                         customMem);
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
@@ -3823,7 +4947,9 @@
                                  ZSTD_dictContentType_e dictContentType,
                                  ZSTD_compressionParameters cParams)
 {
-    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams);
+    /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
     size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0
                                : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
@@ -3848,6 +4974,8 @@
 
     ZSTD_CCtxParams_init(&params, 0);
     params.cParams = cParams;
+    params.useRowMatchFinder = useRowMatchFinder;
+    cdict->useRowMatchFinder = useRowMatchFinder;
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
                                               dict, dictSize,
@@ -3874,60 +5002,86 @@
     return cdict->dictID;
 }
 
+/* ZSTD_compressBegin_usingCDict_internal() :
+ * Implementation of various ZSTD_compressBegin_usingCDict* functions.
+ */
+static size_t ZSTD_compressBegin_usingCDict_internal(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
+    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
+    /* Initialize the cctxParams from the cdict */
+    {
+        ZSTD_parameters params;
+        params.fParams = fParams;
+        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+                        || cdict->compressionLevel == 0 ) ?
+                ZSTD_getCParamsFromCDict(cdict)
+              : ZSTD_getCParams(cdict->compressionLevel,
+                                pledgedSrcSize,
+                                cdict->dictContentSize);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, cdict->compressionLevel);
+    }
+    /* Increase window log to fit the entire dictionary and source if the
+     * source size is known. Limit the increase to 19, which is the
+     * window log for compression level 1 with the largest source size.
+     */
+    if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+        U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+        cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
+    }
+    return ZSTD_compressBegin_internal(cctx,
+                                        NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                        cdict,
+                                        &cctxParams, pledgedSrcSize,
+                                        ZSTDb_not_buffered);
+}
+
 
 /* ZSTD_compressBegin_usingCDict_advanced() :
+ * This function is DEPRECATED.
  * cdict must be != NULL */
 size_t ZSTD_compressBegin_usingCDict_advanced(
     ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
     ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
 {
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
-    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
-    {   ZSTD_CCtx_params params = cctx->requestedParams;
-        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
-                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
-                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
-                        || cdict->compressionLevel == 0 )
-                      && (params.attachDictPref != ZSTD_dictForceLoad) ?
-                ZSTD_getCParamsFromCDict(cdict)
-              : ZSTD_getCParams(cdict->compressionLevel,
-                                pledgedSrcSize,
-                                cdict->dictContentSize);
-        /* Increase window log to fit the entire dictionary and source if the
-         * source size is known. Limit the increase to 19, which is the
-         * window log for compression level 1 with the largest source size.
-         */
-        if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
-            U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
-            U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
-            params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
-        }
-        params.fParams = fParams;
-        return ZSTD_compressBegin_internal(cctx,
-                                           NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
-                                           cdict,
-                                           &params, pledgedSrcSize,
-                                           ZSTDb_not_buffered);
-    }
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
 }
 
 /* ZSTD_compressBegin_usingCDict() :
- * pledgedSrcSize=0 means "unknown"
- * if pledgedSrcSize>0, it will enable contentSizeFlag */
+ * cdict must be != NULL */
 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
 {
     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
-    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
+/*! ZSTD_compress_usingCDict_internal():
+ * Implementation of various ZSTD_compress_usingCDict* functions.
+ */
+static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict_advanced():
+ * This function is DEPRECATED.
+ */
 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                 void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
 {
-    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 /*! ZSTD_compress_usingCDict() :
@@ -3941,7 +5095,7 @@
                                 const ZSTD_CDict* cdict)
 {
     ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 
@@ -4071,7 +5225,7 @@
     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
     FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
-    zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, &params);
+    ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, &params);
     FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
     return 0;
 }
@@ -4351,8 +5505,13 @@
     FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
     ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
     assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
-    if (cctx->cdict)
-        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    if (cctx->cdict && !cctx->localDict.cdict) {
+        /* Let the cdict's compression level take priority over the requested params.
+         * But do not take the cdict's compression level if the "cdict" is actually a localDict
+         * generated from ZSTD_initLocalDict().
+         */
+        params.compressionLevel = cctx->cdict->compressionLevel;
+    }
     DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
     if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
     {
@@ -4371,11 +5530,21 @@
         params.ldmParams.enableLdm = 1;
     }
 
+    if (ZSTD_CParams_useBlockSplitter(&params.cParams)) {
+        DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
+        params.splitBlocks = 1;
+    }
+
+    params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
+
 #ifdef ZSTD_MULTITHREAD
     if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
         params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
     }
     if (params.nbWorkers > 0) {
+#if ZSTD_TRACE
+        cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
+#endif
         /* mt context creation */
         if (cctx->mtctx == NULL) {
             DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
@@ -4389,6 +5558,10 @@
                     cctx->mtctx,
                     prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
                     cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
+        cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0;
+        cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize;
+        cctx->consumedSrcSize = 0;
+        cctx->producedCSize = 0;
         cctx->streamStage = zcss_load;
         cctx->appliedParams = params;
     } else
@@ -4450,8 +5623,12 @@
             size_t const ipos = input->pos;
             size_t const opos = output->pos;
             flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+            cctx->consumedSrcSize += (U64)(input->pos - ipos);
+            cctx->producedCSize += (U64)(output->pos - opos);
             if ( ZSTD_isError(flushMin)
               || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+                if (flushMin == 0)
+                    ZSTD_CCtx_trace(cctx, 0);
                 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
             }
             FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
@@ -4834,7 +6011,7 @@
             continue;
         }
 
-        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+        compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
                                 &cctx->appliedParams,
                                 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
@@ -4866,7 +6043,7 @@
         } else {
             U32 cBlockHeader;
             /* Error checking and repcodes update */
-            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
             if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
                 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
 
@@ -4967,6 +6144,7 @@
 #define ZSTD_MAX_CLEVEL     22
 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
 
 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
 {   /* "default" - for any srcSize > 256 KB */
@@ -5099,7 +6277,10 @@
 static int ZSTD_dedicatedDictSearch_isSupported(
         ZSTD_compressionParameters const* cParams)
 {
-    return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2);
+    return (cParams->strategy >= ZSTD_greedy)
+        && (cParams->strategy <= ZSTD_lazy2)
+        && (cParams->hashLog > cParams->chainLog)
+        && (cParams->chainLog <= 24);
 }
 
 /**
@@ -5117,6 +6298,9 @@
         case ZSTD_lazy:
         case ZSTD_lazy2:
             cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
+                cParams->hashLog = ZSTD_HASHLOG_MIN;
+            }
             break;
         case ZSTD_btlazy2:
         case ZSTD_btopt:
@@ -5165,6 +6349,7 @@
     else row = compressionLevel;
 
     {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
         /* acceleration factor */
         if (compressionLevel < 0) {
             int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index c04998b..3b04fd0 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -81,6 +81,53 @@
     ZSTD_fseCTables_t fse;
 } ZSTD_entropyCTables_t;
 
+/***********************************************
+*  Entropy buffer statistics structs and funcs *
+***********************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  @return : 0 on success or error code */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize);
+
+/*********************************
+*  Compression internals structs *
+*********************************/
+
 typedef struct {
     U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
     U32 len;            /* Raw length of match */
@@ -141,14 +188,21 @@
 } ZSTD_compressedBlockState_t;
 
 typedef struct {
-    BYTE const* nextSrc;    /* next block here to continue on current prefix */
-    BYTE const* base;       /* All regular indexes relative to this position */
-    BYTE const* dictBase;   /* extDict indexes relative to this position */
-    U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more valid data */
+    BYTE const* nextSrc;       /* next block here to continue on current prefix */
+    BYTE const* base;          /* All regular indexes relative to this position */
+    BYTE const* dictBase;      /* extDict indexes relative to this position */
+    U32 dictLimit;             /* below that point, need extDict */
+    U32 lowLimit;              /* below that point, no more valid data */
+    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
+                                * ZSTD_window_init(). Useful for debugging coredumps
+                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
+                                */
 } ZSTD_window_t;
 
 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+
+#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
+
 struct ZSTD_matchState_t {
     ZSTD_window_t window;   /* State for window round buffer management */
     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
@@ -160,9 +214,17 @@
                              */
     U32 nextToUpdate;       /* index from which to continue table update */
     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+
+    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
+
+    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+
     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
                                * dedicated dictionary search structure.
                                */
@@ -184,12 +246,21 @@
 } ldmEntry_t;
 
 typedef struct {
+    BYTE const* split;
+    U32 hash;
+    U32 checksum;
+    ldmEntry_t* bucket;
+} ldmMatchCandidate_t;
+
+#define LDM_BATCH_SIZE 64
+
+typedef struct {
     ZSTD_window_t window;   /* State for the window round buffer management */
     ldmEntry_t* hashTable;
     U32 loadedDictEnd;
     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
-    U64 hashPower;          /* Used to compute the rolling hash.
-                             * Depends on ldmParams.minMatchLength */
+    size_t splitIndices[LDM_BATCH_SIZE];
+    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
 } ldmState_t;
 
 typedef struct {
@@ -246,6 +317,15 @@
     ZSTD_sequenceFormat_e blockDelimiters;
     int validateSequences;
 
+    /* Block splitting */
+    int splitBlocks;
+
+    /* Param for deciding whether to use row-based matchfinder */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
+
+    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
+    int deterministicRefPrefix;
+
     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
     ZSTD_customMem customMem;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
@@ -269,7 +349,9 @@
     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
     ZSTD_CCtx_params requestedParams;
     ZSTD_CCtx_params appliedParams;
+    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
     U32   dictID;
+    size_t dictContentSize;
 
     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
     size_t blockSize;
@@ -321,6 +403,11 @@
 #ifdef ZSTD_MULTITHREAD
     ZSTDMT_CCtx* mtctx;
 #endif
+
+    /* Tracing */
+#if ZSTD_TRACE
+    ZSTD_TraceCtx traceCtx;
+#endif
 };
 
 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -355,7 +442,7 @@
 typedef size_t (*ZSTD_blockCompressor) (
         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
 
 
 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -532,8 +619,8 @@
 
     /* literal Length */
     if (litLength>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 1;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
     }
     seqStorePtr->sequences[0].litLength = (U16)litLength;
@@ -543,8 +630,8 @@
 
     /* match Length */
     if (mlBase>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 2;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_matchLength;
         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
     }
     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
@@ -795,6 +882,13 @@
     window->dictLimit = end;
 }
 
+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
+{
+    return window.dictLimit == 1 &&
+           window.lowLimit == 1 &&
+           (window.nextSrc - window.base) == 1;
+}
+
 /**
  * ZSTD_window_hasExtDict():
  * Returns non-zero if the window has a non-empty extDict.
@@ -818,15 +912,69 @@
             ZSTD_noDict;
 }
 
+/* Defining this macro to non-zero tells zstd to run the overflow correction
+ * code much more frequently. This is very inefficient, and should only be
+ * used for tests and fuzzers.
+ */
+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
+#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
+#  else
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
+#  endif
+#endif
+
+/**
+ * ZSTD_window_canOverflowCorrect():
+ * Returns non-zero if the indices are large enough for overflow correction
+ * to work correctly without impacting compression ratio.
+ */
+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
+                                              U32 cycleLog,
+                                              U32 maxDist,
+                                              U32 loadedDictEnd,
+                                              void const* src)
+{
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const curr = (U32)((BYTE const*)src - window.base);
+    U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
+
+    /* Adjust the min index to backoff the overflow correction frequency,
+     * so we don't waste too much CPU in overflow correction. If this
+     * computation overflows we don't really care, we just need to make
+     * sure it is at least minIndexToOverflowCorrect.
+     */
+    U32 const adjustment = window.nbOverflowCorrections + 1;
+    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
+                                  minIndexToOverflowCorrect);
+    U32 const indexLargeEnough = curr > adjustedIndex;
+
+    /* Only overflow correct early if the dictionary is invalidated already,
+     * so we don't hurt compression ratio.
+     */
+    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
+
+    return indexLargeEnough && dictionaryInvalidated;
+}
+
 /**
  * ZSTD_window_needOverflowCorrection():
  * Returns non-zero if the indices are getting too large and need overflow
  * protection.
  */
 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  U32 cycleLog,
+                                                  U32 maxDist,
+                                                  U32 loadedDictEnd,
+                                                  void const* src,
                                                   void const* srcEnd)
 {
     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
+            return 1;
+        }
+    }
     return curr > ZSTD_CURRENT_MAX;
 }
 
@@ -838,7 +986,6 @@
  *
  * The least significant cycleLog bits of the indices must remain the same,
  * which may be 0. Every index up to maxDist in the past must be valid.
- * NOTE: (maxDist & cycleMask) must be zero.
  */
 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                            U32 maxDist, void const* src)
@@ -862,17 +1009,25 @@
      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
      */
-    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const cycleMask = cycleSize - 1;
     U32 const curr = (U32)((BYTE const*)src - window->base);
     U32 const currentCycle0 = curr & cycleMask;
     /* Exclude zero so that newCurrent - maxDist >= 1. */
-    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
-    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
+    U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
     U32 const correction = curr - newCurrent;
-    assert((maxDist & cycleMask) == 0);
+    /* maxDist must be a power of two so that:
+     *   (newCurrent & cycleMask) == (curr & cycleMask)
+     * This is required to not corrupt the chains / binary tree.
+     */
+    assert((maxDist & (maxDist - 1)) == 0);
+    assert((curr & cycleMask) == (newCurrent & cycleMask));
     assert(curr > newCurrent);
-    /* Loose bound, should be around 1<<29 (see above) */
-    assert(correction > 1<<28);
+    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        /* Loose bound, should be around 1<<29 (see above) */
+        assert(correction > 1<<28);
+    }
 
     window->base += correction;
     window->dictBase += correction;
@@ -888,6 +1043,8 @@
     assert(window->lowLimit <= newCurrent);
     assert(window->dictLimit <= newCurrent);
 
+    ++window->nbOverflowCorrections;
+
     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
              window->lowLimit);
     return correction;
@@ -997,6 +1154,7 @@
     window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
     window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
     window->nextSrc = window->base + 1;   /* see issue #1241 */
+    window->nbOverflowCorrections = 0;
 }
 
 /**
@@ -1007,7 +1165,8 @@
  * Returns non-zero if the segment is contiguous.
  */
 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
-                                  void const* src, size_t srcSize)
+                                  void const* src, size_t srcSize,
+                                  int forceNonContiguous)
 {
     BYTE const* const ip = (BYTE const*)src;
     U32 contiguous = 1;
@@ -1017,7 +1176,7 @@
     assert(window->base != NULL);
     assert(window->dictBase != NULL);
     /* Check if blocks follow each other */
-    if (src != window->nextSrc) {
+    if (src != window->nextSrc || forceNonContiguous) {
         /* not contiguous */
         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
@@ -1200,4 +1359,9 @@
  *  condition for correct operation : hashLog > 1 */
 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
 
+/** ZSTD_CCtx_trace() :
+ *  Trace the end of a compression call.
+ */
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+
 #endif /* ZSTD_COMPRESS_H */
diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c
index 6dd1c14..008337b 100644
--- a/lib/compress/zstd_compress_literals.c
+++ b/lib/compress/zstd_compress_literals.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
 
 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
 
     RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
@@ -42,7 +42,7 @@
 
 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
 
     (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
@@ -117,7 +117,7 @@
         }
     }
 
-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
         ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h
index 8b08705..9904c0c 100644
--- a/lib/compress/zstd_compress_literals.h
+++ b/lib/compress/zstd_compress_literals.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c
index be30c08..611eabd 100644
--- a/lib/compress/zstd_compress_sequences.c
+++ b/lib/compress/zstd_compress_sequences.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -85,6 +85,8 @@
 {
     unsigned cost = 0;
     unsigned s;
+
+    assert(total > 0);
     for (s = 0; s <= max; ++s) {
         unsigned norm = (unsigned)((256 * count[s]) / total);
         if (count[s] != 0 && norm == 0)
@@ -232,6 +234,11 @@
     return set_compressed;
 }
 
+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
 size_t
 ZSTD_buildCTable(void* dst, size_t dstCapacity,
                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
@@ -258,7 +265,7 @@
         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
         return 0;
     case set_compressed: {
-        S16 norm[MaxSeq + 1];
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
         size_t nbSeq_1 = nbSeq;
         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
         if (count[codeTable[nbSeq-1]] > 1) {
@@ -266,11 +273,12 @@
             nbSeq_1--;
         }
         assert(nbSeq_1 > 1);
-        assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
-        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
-        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
             return NCountSize;
         }
     }
diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h
index 68c6f9a..7991364 100644
--- a/lib/compress/zstd_compress_sequences.h
+++ b/lib/compress/zstd_compress_sequences.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index e23e619..e4e4506 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,288 +15,10 @@
 
 #include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
 #include "hist.h"                     /* HIST_countFast_wksp */
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"   /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
 #include "zstd_compress_sequences.h"
 #include "zstd_compress_literals.h"
 
-/*-*************************************
-*  Superblock entropy buffer structs
-***************************************/
-/** ZSTD_hufCTablesMetadata_t :
- *  Stores Literals Block Type for a super-block in hType, and
- *  huffman tree description in hufDesBuffer.
- *  hufDesSize refers to the size of huffman tree description in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
-typedef struct {
-    symbolEncodingType_e hType;
-    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
-    size_t hufDesSize;
-} ZSTD_hufCTablesMetadata_t;
-
-/** ZSTD_fseCTablesMetadata_t :
- *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
- *  fse tables in fseTablesBuffer.
- *  fseTablesSize refers to the size of fse tables in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
-typedef struct {
-    symbolEncodingType_e llType;
-    symbolEncodingType_e ofType;
-    symbolEncodingType_e mlType;
-    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
-    size_t fseTablesSize;
-    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
-} ZSTD_fseCTablesMetadata_t;
-
-typedef struct {
-    ZSTD_hufCTablesMetadata_t hufMetadata;
-    ZSTD_fseCTablesMetadata_t fseMetadata;
-} ZSTD_entropyCTablesMetadata_t;
-
-
-/** ZSTD_buildSuperBlockEntropy_literal() :
- *  Builds entropy for the super-block literals.
- *  Stores literals block type (raw, rle, compressed, repeat) and
- *  huffman description table to hufMetadata.
- *  @return : size of huffman description table or error code */
-static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
-                                            const ZSTD_hufCTables_t* prevHuf,
-                                                  ZSTD_hufCTables_t* nextHuf,
-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                  const int disableLiteralsCompression,
-                                                  void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
-    BYTE* const nodeWksp = countWkspStart + countWkspSize;
-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
-    unsigned maxSymbolValue = 255;
-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
-    HUF_repeat repeat = prevHuf->repeatMode;
-
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
-
-    /* Prepare nextEntropy assuming reusing the existing table */
-    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
-    if (disableLiteralsCompression) {
-        DEBUGLOG(5, "set_basic - disabled");
-        hufMetadata->hType = set_basic;
-        return 0;
-    }
-
-    /* small ? don't even attempt compression (speed opt) */
-#   define COMPRESS_LITERALS_SIZE_MIN 63
-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) {
-            DEBUGLOG(5, "set_basic - too small");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Scan input and build symbol stats */
-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
-        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
-        if (largest == srcSize) {
-            DEBUGLOG(5, "set_rle");
-            hufMetadata->hType = set_rle;
-            return 0;
-        }
-        if (largest <= (srcSize >> 7)+4) {
-            DEBUGLOG(5, "set_basic - no gain");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Validate the previous Huffman table */
-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-        repeat = HUF_repeat_none;
-    }
-
-    /* Build Huffman Tree */
-    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
-                                                    maxSymbolValue, huffLog,
-                                                    nodeWksp, nodeWkspSize);
-        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
-        huffLog = (U32)maxBits;
-        {   /* Build and write the CTable */
-            size_t const newCSize = HUF_estimateCompressedSize(
-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-            size_t const hSize = HUF_writeCTable(
-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
-            /* Check against repeating the previous CTable */
-            if (repeat != HUF_repeat_none) {
-                size_t const oldCSize = HUF_estimateCompressedSize(
-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
-                    DEBUGLOG(5, "set_repeat - smaller");
-                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                    hufMetadata->hType = set_repeat;
-                    return 0;
-                }
-            }
-            if (newCSize + hSize >= srcSize) {
-                DEBUGLOG(5, "set_basic - no gains");
-                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                hufMetadata->hType = set_basic;
-                return 0;
-            }
-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
-            hufMetadata->hType = set_compressed;
-            nextHuf->repeatMode = HUF_repeat_check;
-            return hSize;
-        }
-    }
-}
-
-/** ZSTD_buildSuperBlockEntropy_sequences() :
- *  Builds entropy for the super-block sequences.
- *  Stores symbol compression modes and fse table to fseMetadata.
- *  @return : size of fse tables or error code */
-static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
-                                              const ZSTD_fseCTables_t* prevEntropy,
-                                                    ZSTD_fseCTables_t* nextEntropy,
-                                              const ZSTD_CCtx_params* cctxParams,
-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                    void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
-    BYTE* const cTableWksp = countWkspStart + countWkspSize;
-    const size_t cTableWkspSize = wkspEnd-cTableWksp;
-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
-    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    const BYTE* const llCodeTable = seqStorePtr->llCode;
-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-    BYTE* const ostart = fseMetadata->fseTablesBuffer;
-    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
-    BYTE* op = ostart;
-
-    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
-    ZSTD_memset(workspace, 0, wkspSize);
-
-    fseMetadata->lastCountSize = 0;
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   U32 LLtype;
-        unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->llType = (symbolEncodingType_e) LLtype;
-    }   }
-    /* build CTable for Offsets */
-    {   U32 Offtype;
-        unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
-    }   }
-    /* build CTable for MatchLengths */
-    {   U32 MLtype;
-        unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
-                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
-    }   }
-    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
-    return op-ostart;
-}
-
-
-/** ZSTD_buildSuperBlockEntropy() :
- *  Builds entropy for the super-block.
- *  @return : 0 on success or error code */
-static size_t
-ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
-                      const ZSTD_entropyCTables_t* prevEntropy,
-                            ZSTD_entropyCTables_t* nextEntropy,
-                      const ZSTD_CCtx_params* cctxParams,
-                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                            void* workspace, size_t wkspSize)
-{
-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
-    entropyMetadata->hufMetadata.hufDesSize =
-        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
-                                            &prevEntropy->huf, &nextEntropy->huf,
-                                            &entropyMetadata->hufMetadata,
-                                            ZSTD_disableLiteralsCompression(cctxParams),
-                                            workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
-    entropyMetadata->fseMetadata.fseTablesSize =
-        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
-                                              &prevEntropy->fse, &nextEntropy->fse,
-                                              cctxParams,
-                                              &entropyMetadata->fseMetadata,
-                                              workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
-    return 0;
-}
-
 /** ZSTD_compressSubBlock_literal() :
  *  Compresses literals section for a sub-block.
  *  When we have to write the Huffman table we will sometimes choose a header
@@ -304,7 +26,7 @@
  *  before we know the table size + compressed size, so we have a bound on the
  *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
  *
- *  We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
+ *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
  *  in writing the header, otherwise it is set to 0.
  *
  *  hufMetadata->hType has literals block type info.
@@ -643,8 +365,9 @@
                                                   void* workspace, size_t wkspSize,
                                                   int writeEntropy)
 {
-    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
     size_t cSeqSizeEstimate = 0;
+    if (nbSeq == 0) return sequencesSectionHeaderSize;
     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
                                          nbSeq, fseTables->offcodeCTable, NULL,
                                          OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
@@ -830,7 +553,7 @@
                                unsigned lastBlock) {
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 
-    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
           &zc->blockState.prevCBlock->entropy,
           &zc->blockState.nextCBlock->entropy,
           &zc->appliedParams,
diff --git a/lib/compress/zstd_compress_superblock.h b/lib/compress/zstd_compress_superblock.h
index 07f4cb1..176f9b1 100644
--- a/lib/compress/zstd_compress_superblock.h
+++ b/lib/compress/zstd_compress_superblock.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index d65170b..2656d26 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -35,6 +35,10 @@
 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
 #endif
 
+
+/* Set our tables and aligneds to align by 64 bytes */
+#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
+
 /*-*************************************
 *  Structures
 ***************************************/
@@ -117,10 +121,11 @@
  * - Tables: these are any of several different datastructures (hash tables,
  *   chain tables, binary trees) that all respect a common format: they are
  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
- *   Their sizes depend on the cparams.
+ *   Their sizes depend on the cparams. These tables are 64-byte aligned.
  *
  * - Aligned: these buffers are used for various purposes that require 4 byte
- *   alignment, but don't require any initialization before they're used.
+ *   alignment, but don't require any initialization before they're used. These
+ *   buffers are each aligned to 64 bytes.
  *
  * - Buffers: these buffers are used for various purposes that don't require
  *   any alignment or initialization before they're used. This means they can
@@ -133,8 +138,7 @@
  *
  * 1. Objects
  * 2. Buffers
- * 3. Aligned
- * 4. Tables
+ * 3. Aligned/Tables
  *
  * Attempts to reserve objects of different types out of order will fail.
  */
@@ -187,6 +191,8 @@
  * Since tables aren't currently redzoned, you don't need to call through this
  * to figure out how much space you need for the matchState tables. Everything
  * else is though.
+ *
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
  */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
     if (size == 0)
@@ -198,30 +204,110 @@
 #endif
 }
 
-MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+/**
+ * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
+ * Used to determine the number of bytes required for a given "aligned".
+ */
+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+}
+
+/**
+ * Returns the amount of additional space the cwksp must allocate
+ * for internal purposes (currently only alignment).
+ */
+MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
+    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
+     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
+     * to align the beginning of the aligned secion.
+     *
+     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
+     * aligneds being sized in multiples of 64 bytes.
+     */
+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+    return slackSpace;
+}
+
+
+/**
+ * Return the number of additional bytes required to align a pointer to the given number of bytes.
+ * alignBytes must be a power of two.
+ */
+MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
+    size_t const alignBytesMask = alignBytes - 1;
+    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
+    assert((alignBytes & alignBytesMask) == 0);
+    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+    return bytes;
+}
+
+/**
+ * Internal function. Do not use directly.
+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
+ * counts from the end of the wksp. (as opposed to the object/table segment)
+ *
+ * Returns a pointer to the beginning of that space.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
+    void* const alloc = (BYTE*)ws->allocStart - bytes;
+    void* const bottom = ws->tableEnd;
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+    return alloc;
+}
+
+/**
+ * Moves the cwksp to the next phase, and does any necessary allocations.
+ * Returns a 0 on success, or zstd error
+ */
+MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
         ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
     assert(phase >= ws->phase);
     if (phase > ws->phase) {
+        /* Going from allocating objects to allocating buffers */
         if (ws->phase < ZSTD_cwksp_alloc_buffers &&
                 phase >= ZSTD_cwksp_alloc_buffers) {
             ws->tableValidEnd = ws->objectEnd;
         }
+
+        /* Going from allocating buffers to allocating aligneds/tables */
         if (ws->phase < ZSTD_cwksp_alloc_aligned &&
                 phase >= ZSTD_cwksp_alloc_aligned) {
-            /* If unaligned allocations down from a too-large top have left us
-             * unaligned, we need to realign our alloc ptr. Technically, this
-             * can consume space that is unaccounted for in the neededSpace
-             * calculation. However, I believe this can only happen when the
-             * workspace is too large, and specifically when it is too large
-             * by a larger margin than the space that will be consumed. */
-            /* TODO: cleaner, compiler warning friendly way to do this??? */
-            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
-            if (ws->allocStart < ws->tableValidEnd) {
-                ws->tableValidEnd = ws->allocStart;
+            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
+                size_t const bytesToAlign =
+                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
+                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
+                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
+                                memory_allocation, "aligned phase - alignment initial allocation failed!");
+            }
+            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
+                void* const alloc = ws->objectEnd;
+                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                void* const end = (BYTE*)alloc + bytesToAlign;
+                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
+                RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
+                                "table phase - alignment initial allocation failed!");
+                ws->objectEnd = end;
+                ws->tableEnd = end;
+                ws->tableValidEnd = end;
             }
         }
         ws->phase = phase;
+        ZSTD_cwksp_assert_internal_consistency(ws);
     }
+    return 0;
 }
 
 /**
@@ -237,38 +323,25 @@
 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
         ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
     void* alloc;
-    void* bottom = ws->tableEnd;
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
-    alloc = (BYTE *)ws->allocStart - bytes;
-
-    if (bytes == 0)
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
         return NULL;
+    }
 
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     /* over-reserve space */
-    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
 
-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    assert(alloc >= bottom);
-    if (alloc < bottom) {
-        DEBUGLOG(4, "cwksp: alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    if (alloc < ws->tableValidEnd) {
-        ws->tableValidEnd = alloc;
-    }
-    ws->allocStart = alloc;
+    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
 
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
      * either size. */
-    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        __asan_unpoison_memory_region(alloc, bytes);
+    if (alloc) {
+        alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+        if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+            __asan_unpoison_memory_region(alloc, bytes);
+        }
     }
 #endif
 
@@ -283,28 +356,36 @@
 }
 
 /**
- * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
  */
 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
-    assert((bytes & (sizeof(U32)-1)) == 0);
-    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+                                            ZSTD_cwksp_alloc_aligned);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    return ptr;
 }
 
 /**
- * Aligned on sizeof(unsigned). These buffers have the special property that
+ * Aligned on 64 bytes. These buffers have the special property that
  * their values remain constrained, allowing us to re-use them without
  * memset()-ing them.
  */
 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
     const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
-    void* alloc = ws->tableEnd;
-    void* end = (BYTE *)alloc + bytes;
-    void* top = ws->allocStart;
+    void* alloc;
+    void* end;
+    void* top;
+
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+        return NULL;
+    }
+    alloc = ws->tableEnd;
+    end = (BYTE *)alloc + bytes;
+    top = ws->allocStart;
 
     DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
         alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
     assert((bytes & (sizeof(U32)-1)) == 0);
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
     ZSTD_cwksp_assert_internal_consistency(ws);
     assert(end <= top);
     if (end > top) {
@@ -320,6 +401,8 @@
     }
 #endif
 
+    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
     return alloc;
 }
 
@@ -503,7 +586,7 @@
 
 /**
  * Moves the management of a workspace from one cwksp to another. The src cwksp
- * is left in an invalid state (src must be re-init()'ed before its used again).
+ * is left in an invalid state (src must be re-init()'ed before it's used again).
  */
 MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
     *dst = *src;
@@ -527,6 +610,24 @@
 *  Functions Checking Free Space
 ***************************************/
 
+/* ZSTD_alignmentSpaceWithinBounds() :
+ * Returns if the estimated space needed for a wksp is within an acceptable limit of the
+ * actual amount of space used.
+ */
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
+                                                        size_t const estimatedSpace, int resizedWorkspace) {
+    if (resizedWorkspace) {
+        /* Resized/newly allocated wksp should have exact bounds */
+        return ZSTD_cwksp_used(ws) == estimatedSpace;
+    } else {
+        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
+         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
+         */
+        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
+    }
+}
+
+
 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
     return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
 }
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index ef12a52..d0d3a78 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -409,7 +409,7 @@
         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
 
         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
-            & (repIndex > dictStartIndex))
+            & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@@ -477,7 +477,7 @@
                 U32 const repIndex2 = current2 - offset_2;
                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
-                    & (repIndex2 > dictStartIndex))
+                    & (offset_2 < current2 - dictStartIndex))
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h
index 14d944d..e16b7b0 100644
--- a/lib/compress/zstd_double_fast.h
+++ b/lib/compress/zstd_double_fast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index db7ce83..4edc04d 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -242,7 +242,7 @@
     assert(endIndex - prefixStartIndex <= maxDistance);
     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
 
-    /* ensure there will be no no underflow
+    /* ensure there will be no underflow
      * when translating a dict index into a local index */
     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
 
@@ -416,9 +416,9 @@
         const BYTE* const repMatch = repBase + repIndex;
         hashTable[h] = curr;   /* update hash table */
         DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
-        assert(offset_1 <= curr +1);   /* check repIndex */
 
-        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+             & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@@ -453,7 +453,7 @@
                 U32 const current2 = (U32)(ip-base);
                 U32 const repIndex2 = current2 - offset_2;
                 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h
index cf6aaa8..0d4a0c1 100644
--- a/lib/compress/zstd_fast.h
+++ b/lib/compress/zstd_fast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 49ec1b0..3d523e8 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -438,43 +438,9 @@
     }
 }
 
-
-
-/* *********************************
-*  Hash Chain
+/***********************************
+* Dedicated dict search
 ***********************************/
-#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
-                        ZSTD_matchState_t* ms,
-                        const ZSTD_compressionParameters* const cParams,
-                        const BYTE* ip, U32 const mls)
-{
-    U32* const hashTable  = ms->hashTable;
-    const U32 hashLog = cParams->hashLog;
-    U32* const chainTable = ms->chainTable;
-    const U32 chainMask = (1 << cParams->chainLog) - 1;
-    const BYTE* const base = ms->window.base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = ms->nextToUpdate;
-
-    while(idx < target) { /* catch up */
-        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
-        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-        hashTable[h] = idx;
-        idx++;
-    }
-
-    ms->nextToUpdate = target;
-    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
-}
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
 {
@@ -500,11 +466,10 @@
     U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
     U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
     U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
-
     U32 hashIdx;
 
     assert(ms->cParams.chainLog <= 24);
-    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(ms->cParams.hashLog > ms->cParams.chainLog);
     assert(idx != 0);
     assert(tmpMinChain <= minChain);
 
@@ -535,7 +500,7 @@
             if (count == cacheSize) {
                 for (count = 0; count < chainLimit;) {
                     if (i < minChain) {
-                        if (!i || countBeyondMinChain++ > cacheSize) {
+                        if (!i || ++countBeyondMinChain > cacheSize) {
                             /* only allow pulling `cacheSize` number of entries
                              * into the cache or chainTable beyond `minChain`,
                              * to replace the entries pulled out of the
@@ -591,6 +556,139 @@
     ms->nextToUpdate = target;
 }
 
+/* Returns the longest match length found in the dedicated dict search structure.
+ * If none are longer than the argument ml, then ml will be returned.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
+                                            const ZSTD_matchState_t* const dms,
+                                            const BYTE* const ip, const BYTE* const iLimit,
+                                            const BYTE* const prefixStart, const U32 curr,
+                                            const U32 dictLimit, const size_t ddsIdx) {
+    const U32 ddsLowestIndex  = dms->window.dictLimit;
+    const BYTE* const ddsBase = dms->window.base;
+    const BYTE* const ddsEnd  = dms->window.nextSrc;
+    const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+    const U32 ddsIndexDelta   = dictLimit - ddsSize;
+    const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+    const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+    U32 ddsAttempt;
+    U32 matchIndex;
+
+    for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+        PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 const chainIndex = chainPackedPointer >> 8;
+
+        PREFETCH_L1(&dms->chainTable[chainIndex]);
+    }
+
+    for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+        size_t currentMl=0;
+        const BYTE* match;
+        matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+        match = ddsBase + matchIndex;
+
+        if (!matchIndex) {
+            return ml;
+        }
+
+        /* guaranteed by table construction */
+        (void)ddsLowestIndex;
+        assert(matchIndex >= ddsLowestIndex);
+        assert(match+4 <= ddsEnd);
+        if (MEM_read32(match) == MEM_read32(ip)) {
+            /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+            currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) {
+                /* best possible, avoids read overflow on next attempt */
+                return ml;
+            }
+        }
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 chainIndex = chainPackedPointer >> 8;
+        U32 const chainLength = chainPackedPointer & 0xFF;
+        U32 const chainAttempts = nbAttempts - ddsAttempt;
+        U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+        U32 chainAttempt;
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+            PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+        }
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->chainTable[chainIndex];
+            match = ddsBase + matchIndex;
+
+            /* guaranteed by table construction */
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+    return ml;
+}
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
 
 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
@@ -661,90 +759,8 @@
     }
 
     if (dictMode == ZSTD_dedicatedDictSearch) {
-        const U32 ddsLowestIndex  = dms->window.dictLimit;
-        const BYTE* const ddsBase = dms->window.base;
-        const BYTE* const ddsEnd  = dms->window.nextSrc;
-        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
-        const U32 ddsIndexDelta   = dictLimit - ddsSize;
-        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
-        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
-        U32 ddsAttempt;
-
-        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
-            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
-        }
-
-        {
-            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-            U32 const chainIndex = chainPackedPointer >> 8;
-
-            PREFETCH_L1(&dms->chainTable[chainIndex]);
-        }
-
-        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
-            size_t currentMl=0;
-            const BYTE* match;
-            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
-            match = ddsBase + matchIndex;
-
-            if (!matchIndex) {
-                return ml;
-            }
-
-            /* guaranteed by table construction */
-            (void)ddsLowestIndex;
-            assert(matchIndex >= ddsLowestIndex);
-            assert(match+4 <= ddsEnd);
-            if (MEM_read32(match) == MEM_read32(ip)) {
-                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-            }
-
-            /* save best solution */
-            if (currentMl > ml) {
-                ml = currentMl;
-                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                if (ip+currentMl == iLimit) {
-                    /* best possible, avoids read overflow on next attempt */
-                    return ml;
-                }
-            }
-        }
-
-        {
-            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-            U32 chainIndex = chainPackedPointer >> 8;
-            U32 const chainLength = chainPackedPointer & 0xFF;
-            U32 const chainAttempts = nbAttempts - ddsAttempt;
-            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
-            U32 chainAttempt;
-
-            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
-                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
-            }
-
-            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
-                size_t currentMl=0;
-                const BYTE* match;
-                matchIndex = dms->chainTable[chainIndex];
-                match = ddsBase + matchIndex;
-
-                /* guaranteed by table construction */
-                assert(matchIndex >= ddsLowestIndex);
-                assert(match+4 <= ddsEnd);
-                if (MEM_read32(match) == MEM_read32(ip)) {
-                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-                }
-
-                /* save best solution */
-                if (currentMl > ml) {
-                    ml = currentMl;
-                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-                }
-            }
-        }
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
     } else if (dictMode == ZSTD_dictMatchState) {
         const U32* const dmsChainTable = dms->chainTable;
         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
@@ -845,11 +861,657 @@
     }
 }
 
+/* *********************************
+* (SIMD) Row-based matchfinder
+***********************************/
+/* Constants for row-based hash */
+#define ZSTD_ROW_HASH_TAG_OFFSET 1                               /* byte offset of hashes in the match state's tagTable from the beginning of a row */
+#define ZSTD_ROW_HASH_TAG_BITS 8                                 /* nb bits to use for the tag */
+#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
+
+#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
+
+typedef U32 ZSTD_VecMask;   /* Clarifies when we are interacting with a U32 representing a mask of matches */
+
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */
+
+#include <emmintrin.h>
+typedef __m128i ZSTD_Vec128;
+
+/* Returns a 128-bit container with 128-bits from src */
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+  return _mm_loadu_si128((ZSTD_Vec128 const*)src);
+}
+
+/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+  return _mm_set1_epi8((char)val);
+}
+
+/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask
+ * into a 32-bit mask that is the MSB of each byte.
+ * */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+  return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
+}
+
+typedef struct {
+  __m128i fst;
+  __m128i snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_read(ptr);
+  v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+  return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_set8(val);
+  v.snd = ZSTD_Vec128_set8(val);
+  return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+  ZSTD_VecMask fstMask;
+  ZSTD_VecMask sndMask;
+  fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+  sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+  return fstMask | (sndMask << 16);
+}
+
+#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */
+
+#include <arm_neon.h>
+typedef uint8x16_t ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+  return vld1q_u8((const BYTE* const)src);
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+  return vdupq_n_u8(val);
+}
+
+/* Mimics '_mm_movemask_epi8()' from SSE */
+static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) {
+    /* Shift out everything but the MSB bits in each byte */
+    uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7));
+    /* Merge the even lanes together with vsra (right shift and add) */
+    uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7));
+    uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
+    uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+    /* Extract the low 8 bits from each lane, merge */
+    return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8);
+}
+
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+  return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y));
+}
+
+typedef struct {
+    uint8x16_t fst;
+    uint8x16_t snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_read(ptr);
+  v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+  return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_set8(val);
+  v.snd = ZSTD_Vec128_set8(val);
+  return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+  ZSTD_VecMask fstMask;
+  ZSTD_VecMask sndMask;
+  fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+  sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+  return fstMask | (sndMask << 16);
+}
+
+#else /* Scalar fallback version */
+
+#define VEC128_NB_SIZE_T (16 / sizeof(size_t))
+typedef struct {
+    size_t vec[VEC128_NB_SIZE_T];
+} ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+    ZSTD_Vec128 ret;
+    ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t));
+    return ret;
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+    ZSTD_Vec128 ret = { {0} };
+    int startBit = sizeof(size_t) * 8 - 8;
+    for (;startBit >= 0; startBit -= 8) {
+        unsigned j = 0;
+        for (;j < VEC128_NB_SIZE_T; ++j) {
+            ret.vec[j] |= ((size_t)val << startBit);
+        }
+    }
+    return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+    ZSTD_VecMask res = 0;
+    unsigned i = 0;
+    unsigned l = 0;
+    for (; i < VEC128_NB_SIZE_T; ++i) {
+        const size_t cmp1 = x.vec[i];
+        const size_t cmp2 = y.vec[i];
+        unsigned j = 0;
+        for (; j < sizeof(size_t); ++j, ++l) {
+            if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+                res |= ((U32)1 << (j+i*sizeof(size_t)));
+            }
+        }
+    }
+    return res;
+}
+
+#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T
+typedef struct {
+    size_t vec[VEC256_NB_SIZE_T];
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) {
+    ZSTD_Vec256 ret;
+    ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t));
+    return ret;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+    ZSTD_Vec256 ret = { {0} };
+    int startBit = sizeof(size_t) * 8 - 8;
+    for (;startBit >= 0; startBit -= 8) {
+        unsigned j = 0;
+        for (;j < VEC256_NB_SIZE_T; ++j) {
+            ret.vec[j] |= ((size_t)val << startBit);
+        }
+    }
+    return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+    ZSTD_VecMask res = 0;
+    unsigned i = 0;
+    unsigned l = 0;
+    for (; i < VEC256_NB_SIZE_T; ++i) {
+        const size_t cmp1 = x.vec[i];
+        const size_t cmp2 = y.vec[i];
+        unsigned j = 0;
+        for (; j < sizeof(size_t); ++j, ++l) {
+            if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+                res |= ((U32)1 << (j+i*sizeof(size_t)));
+            }
+        }
+    }
+    return res;
+}
+
+#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */
+
+/* ZSTD_VecMask_next():
+ * Starting from the LSB, returns the idx of the next non-zero bit.
+ * Basically counting the nb of trailing zeroes.
+ */
+static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    return _BitScanForward(&r, val) ? (U32)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)
+    return (U32)__builtin_ctz(val);
+#   else
+    /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */
+    static const U32 multiplyDeBruijnBitPosition[32] =
+    {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+	return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27];
+#   endif
+}
+
+/* ZSTD_VecMask_rotateRight():
+ * Rotates a bitfield to the right by "rotation" bits.
+ * If the rotation is greater than totalBits, the returned mask is 0.
+ */
+FORCE_INLINE_TEMPLATE ZSTD_VecMask
+ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) {
+  if (rotation == 0)
+    return mask;
+  switch (totalBits) {
+    default:
+      assert(0);
+    case 16:
+      return (mask >> rotation) | (U16)(mask << (16 - rotation));
+    case 32:
+      return (mask >> rotation) | (U32)(mask << (32 - rotation));
+  }
+}
+
+/* ZSTD_row_nextIndex():
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
+ * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
+  U32 const next = (*tagRow - 1) & rowMask;
+  *tagRow = (BYTE)next;
+  return next;
+}
+
+/* ZSTD_isAligned():
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
+ */
+MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
+    assert((align & (align - 1)) == 0);
+    return (((size_t)ptr) & (align - 1)) == 0;
+}
+
+/* ZSTD_row_prefetch():
+ * Performs prefetching for the hashTable and tagTable at a given row.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+    PREFETCH_L1(hashTable + relRow);
+    if (rowLog == 5) {
+        PREFETCH_L1(hashTable + relRow + 16);
+    }
+    PREFETCH_L1(tagTable + relRow);
+    assert(rowLog == 4 || rowLog == 5);
+    assert(ZSTD_isAligned(hashTable + relRow, 64));                 /* prefetched hash row always 64-byte aligned */
+    assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */
+}
+
+/* ZSTD_row_fillHashCache():
+ * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
+ * but not beyond iLimit.
+ */
+static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+                                   U32 const rowLog, U32 const mls,
+                                   U32 idx, const BYTE* const iLimit)
+{
+    U32 const* const hashTable = ms->hashTable;
+    U16 const* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
+    U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
+
+    for (; idx < lim; ++idx) {
+        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+        ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
+    }
+
+    DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
+                                                     ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
+                                                     ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
+}
+
+/* ZSTD_row_nextCachedHash():
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+                                                  U16 const* tagTable, BYTE const* base,
+                                                  U32 idx, U32 const hashLog,
+                                                  U32 const rowLog, U32 const mls)
+{
+    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+    U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+    ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+    {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
+        cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
+        return hash;
+    }
+}
+
+/* ZSTD_row_update_internal():
+ * Inserts the byte at ip into the appropriate position in the hash table.
+ * Determines the relative row, and the position within the {16, 32} entry row to insert at.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+                                                    U32 const mls, U32 const rowLog,
+                                                    U32 const rowMask, U32 const useCache)
+{
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target);
+    for (; idx < target; ++idx) {
+        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls)
+                                  : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
+                                                       Explicit cast allows us to get exact desired position within each row */
+        U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+
+        assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
+        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+        row[pos] = idx;
+    }
+    ms->nextToUpdate = target;
+}
+
+/* ZSTD_row_update():
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
+ * processing.
+ */
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
+    const U32 rowMask = (1u << rowLog) - 1;
+    const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
+
+    DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
+}
+
+/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
+ * the hash at the nth position in a row of the tagTable.
+ */
+FORCE_INLINE_TEMPLATE
+ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) {
+    ZSTD_VecMask matches = 0;
+    if (rowEntries == 16) {
+        ZSTD_Vec128 hashes        = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+        ZSTD_Vec128 expandedTags  = ZSTD_Vec128_set8(tag);
+        matches                   = ZSTD_Vec128_cmpMask8(hashes, expandedTags);
+    } else if (rowEntries == 32) {
+        ZSTD_Vec256 hashes        = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+        ZSTD_Vec256 expandedTags  = ZSTD_Vec256_set8(tag);
+        matches                   = ZSTD_Vec256_cmpMask8(hashes, expandedTags);
+    } else {
+        assert(0);
+    }
+    /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
+        to match up with the actual layout of the entries within the hashTable */
+    return ZSTD_VecMask_rotateRight(matches, head, rowEntries);
+}
+
+/* The high-level approach of the SIMD row based match finder is as follows:
+ * - Figure out where to insert the new entry:
+ *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
+ *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ *        which row to insert into.
+ *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
+ *        be considered as a circular buffer with a "head" index that resides in the tagTable.
+ *      - Also insert the "tag" into the equivalent row and position in the tagTable.
+ *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
+ *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
+ *                  for alignment/performance reasons, leaving some bytes unused.
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ *   generate a bitfield that we can cycle through to check the collisions in the hash table.
+ * - Pick the longest match.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_RowFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode,
+                        const U32 rowLog)
+{
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32* const hashCache = ms->hashCache;
+    const U32 hashLog = ms->rowHashLog;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 rowEntries = (1U << rowLog);
+    const U32 rowMask = rowEntries - 1;
+    const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
+    U32 nbAttempts = 1U << cappedSearchLog;
+    size_t ml=4-1;
+
+    /* DMS/DDS variables that may be referenced laster */
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    size_t ddsIdx;
+    U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
+    U32 dmsTag;
+    U32* dmsRow;
+    BYTE* dmsTagRow;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+        {   /* Prefetch DDS hashtable entry */
+            ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
+            PREFETCH_L1(&dms->hashTable[ddsIdx]);
+        }
+        ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
+    }
+
+    if (dictMode == ZSTD_dictMatchState) {
+        /* Prefetch DMS rows */
+        U32* const dmsHashTable = dms->hashTable;
+        U16* const dmsTagTable = dms->tagTable;
+        U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
+        dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
+        dmsRow = dmsHashTable + dmsRelRow;
+        ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
+    }
+
+    /* Update the hashTable and tagTable up to (but not including) ip */
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+    {   /* Get the hash for ip, compute the appropriate row */
+        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);
+        U32 const head = *tagRow & rowMask;
+        U32 matchBuffer[32 /* maximum nb entries per row */];
+        size_t numMatches = 0;
+        size_t currMatch = 0;
+        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
+
+        /* Cycle through the matches and prefetch */
+        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+            U32 const matchIndex = row[matchPos];
+            assert(numMatches < rowEntries);
+            if (matchIndex < lowLimit)
+                break;
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                PREFETCH_L1(base + matchIndex);
+            } else {
+                PREFETCH_L1(dictBase + matchIndex);
+            }
+            matchBuffer[numMatches++] = matchIndex;
+        }
+
+        /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
+           in ZSTD_row_update_internal() at the next search. */
+        {
+            U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+            row[pos] = ms->nextToUpdate++;
+        }
+
+        /* Return the longest match */
+        for (; currMatch < numMatches; ++currMatch) {
+            U32 const matchIndex = matchBuffer[currMatch];
+            size_t currentMl=0;
+            assert(matchIndex < curr);
+            assert(matchIndex >= lowLimit);
+
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                const BYTE* const match = base + matchIndex;
+                assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+                if (match[ml] == ip[ml])   /* potentially better */
+                    currentMl = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex;
+                assert(match+4 <= dictEnd);
+                if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+            }
+
+            /* Save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
+    } else if (dictMode == ZSTD_dictMatchState) {
+        /* TODO: Measure and potentially add prefetching to DMS */
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+
+        {   U32 const head = *dmsTagRow & rowMask;
+            U32 matchBuffer[32 /* maximum nb row entries */];
+            size_t numMatches = 0;
+            size_t currMatch = 0;
+            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
+
+            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+                U32 const matchIndex = dmsRow[matchPos];
+                if (matchIndex < dmsLowestIndex)
+                    break;
+                PREFETCH_L1(dmsBase + matchIndex);
+                matchBuffer[numMatches++] = matchIndex;
+            }
+
+            /* Return the longest match */
+            for (; currMatch < numMatches; ++currMatch) {
+                U32 const matchIndex = matchBuffer[currMatch];
+                size_t currentMl=0;
+                assert(matchIndex >= dmsLowestIndex);
+                assert(matchIndex < curr);
+
+                {   const BYTE* const match = dmsBase + matchIndex;
+                    assert(match+4 <= dmsEnd);
+                    if (MEM_read32(match) == MEM_read32(ip))
+                        currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+                }
+
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break;
+                }
+            }
+        }
+    }
+    return ml;
+}
+
+/* Inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog);
+    case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog);
+    case 7 :
+    case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5);
+    }
+}
+
 
 /* *******************************
 *  Common parser - lazy strategy
 *********************************/
-typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
 
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_compressBlock_lazy_generic(
@@ -863,10 +1525,11 @@
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
     const BYTE* const base = ms->window.base;
     const U32 prefixLowestIndex = ms->window.dictLimit;
     const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
 
     typedef size_t (*searchMax_f)(
                         ZSTD_matchState_t* ms,
@@ -878,26 +1541,30 @@
      * that should never occur (extDict modes go to the other implementation
      * below and there is no DDSS for binary tree search yet).
      */
-    const searchMax_f searchFuncs[4][2] = {
+    const searchMax_f searchFuncs[4][3] = {
         {
             ZSTD_HcFindBestMatch_selectMLS,
-            ZSTD_BtFindBestMatch_selectMLS
+            ZSTD_BtFindBestMatch_selectMLS,
+            ZSTD_RowFindBestMatch_selectRowLog
         },
         {
             NULL,
+            NULL,
             NULL
         },
         {
             ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
-            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_RowFindBestMatch_dictMatchState_selectRowLog
         },
         {
             ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
-            NULL
+            NULL,
+            ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog
         }
     };
 
-    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
+    searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod];
     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
 
     const int isDMS = dictMode == ZSTD_dictMatchState;
@@ -915,9 +1582,7 @@
 
     assert(searchMax != NULL);
 
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
-
-    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
     ip += (dictAndPrefixLength == 0);
     if (dictMode == ZSTD_noDict) {
         U32 const curr = (U32)(ip - base);
@@ -933,6 +1598,12 @@
         assert(offset_2 <= dictAndPrefixLength);
     }
 
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                            ms->nextToUpdate, ilimit);
+    }
+
     /* Match Loop */
 #if defined(__GNUC__) && defined(__x86_64__)
     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
@@ -1198,6 +1869,70 @@
     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
 }
 
+/* Row-based matchfinder */
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+}
 
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_lazy_extDict_generic(
@@ -1210,7 +1945,7 @@
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
     const BYTE* const base = ms->window.base;
     const U32 dictLimit = ms->window.dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
@@ -1218,18 +1953,28 @@
     const BYTE* const dictEnd  = dictBase + dictLimit;
     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
     const U32 windowLog = ms->cParams.windowLog;
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
 
     typedef size_t (*searchMax_f)(
                         ZSTD_matchState_t* ms,
                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
-
+    const searchMax_f searchFuncs[3] = {
+        ZSTD_HcFindBestMatch_extDict_selectMLS,
+        ZSTD_BtFindBestMatch_extDict_selectMLS,
+        ZSTD_RowFindBestMatch_extDict_selectRowLog
+    };
+    searchMax_f searchMax = searchFuncs[(int)searchMethod];
     U32 offset_1 = rep[0], offset_2 = rep[1];
 
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
 
     /* init */
     ip += (ip == prefixStart);
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                               ms->nextToUpdate, ilimit);
+    }
 
     /* Match Loop */
 #if defined(__GNUC__) && defined(__x86_64__)
@@ -1249,7 +1994,8 @@
             const U32 repIndex = (U32)(curr+1 - offset_1);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
+               & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1280,7 +2026,8 @@
                 const U32 repIndex = (U32)(curr - offset_1);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
-                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                   & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
                     /* repcode detected */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1311,7 +2058,8 @@
                     const U32 repIndex = (U32)(curr - offset_1);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
-                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                       & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1357,7 +2105,8 @@
             const U32 repIndex = repCurrent - offset_2;
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+               & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
             if (MEM_read32(ip) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1410,3 +2159,26 @@
 {
     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
 }
+
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+}
diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h
index d0214d5..150f7b3 100644
--- a/lib/compress/zstd_lazy.h
+++ b/lib/compress/zstd_lazy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,6 +26,7 @@
 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
 
 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
 
@@ -43,6 +44,15 @@
 size_t ZSTD_compressBlock_greedy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -56,6 +66,15 @@
 size_t ZSTD_compressBlock_greedy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -66,6 +85,15 @@
 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_greedy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -76,9 +104,19 @@
 size_t ZSTD_compressBlock_lazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+        
 
 #if defined (__cplusplus)
 }
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 3f3d7c4..fa4ebea 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,13 +11,126 @@
 #include "zstd_ldm.h"
 
 #include "../common/debug.h"
+#include "../common/xxhash.h"
 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
+#include "zstd_ldm_geartab.h"
 
 #define LDM_BUCKET_SIZE_LOG 3
 #define LDM_MIN_MATCH_LENGTH 64
 #define LDM_HASH_RLOG 7
-#define LDM_HASH_CHAR_OFFSET 10
+
+typedef struct {
+    U64 rolling;
+    U64 stopMask;
+} ldmRollingHashState_t;
+
+/** ZSTD_ldm_gear_init():
+ *
+ * Initializes the rolling hash state such that it will honor the
+ * settings in params. */
+static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
+{
+    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
+    unsigned hashRateLog = params->hashRateLog;
+
+    state->rolling = ~(U32)0;
+
+    /* The choice of the splitting criterion is subject to two conditions:
+     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
+     *   2. ideally, it has to depend on a window of minMatchLength bytes.
+     *
+     * In the gear hash algorithm, bit n depends on the last n bytes;
+     * so in order to obtain a good quality splitting criterion it is
+     * preferable to use bits with high weight.
+     *
+     * To match condition 1 we use a mask with hashRateLog bits set
+     * and, because of the previous remark, we make sure these bits
+     * have the highest possible weight while still respecting
+     * condition 2.
+     */
+    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
+        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
+    } else {
+        /* In this degenerate case we simply honor the hash rate. */
+        state->stopMask = ((U64)1 << hashRateLog) - 1;
+    }
+}
+
+/** ZSTD_ldm_gear_reset()
+ * Feeds [data, data + minMatchLength) into the hash without registering any
+ * splits. This effectively resets the hash state. This is used when skipping
+ * over data, either at the beginning of a block, or skipping sections.
+ */
+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
+                                BYTE const* data, size_t minMatchLength)
+{
+    U64 hash = state->rolling;
+    size_t n = 0;
+
+#define GEAR_ITER_ONCE() do {                                  \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1;                                                \
+    } while (0)
+    while (n + 3 < minMatchLength) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < minMatchLength) {
+        GEAR_ITER_ONCE();
+    }
+#undef GEAR_ITER_ONCE
+}
+
+/** ZSTD_ldm_gear_feed():
+ *
+ * Registers in the splits array all the split points found in the first
+ * size bytes following the data pointer. This function terminates when
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
+ * present in the splits array.
+ *
+ * Precondition: The splits array must not be full.
+ * Returns: The number of bytes processed. */
+static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
+                                 BYTE const* data, size_t size,
+                                 size_t* splits, unsigned* numSplits)
+{
+    size_t n;
+    U64 hash, mask;
+
+    hash = state->rolling;
+    mask = state->stopMask;
+    n = 0;
+
+#define GEAR_ITER_ONCE() do { \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1; \
+        if (UNLIKELY((hash & mask) == 0)) { \
+            splits[*numSplits] = n; \
+            *numSplits += 1; \
+            if (*numSplits == LDM_BATCH_SIZE) \
+                goto done; \
+        } \
+    } while (0)
+
+    while (n + 3 < size) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < size) {
+        GEAR_ITER_ONCE();
+    }
+
+#undef GEAR_ITER_ONCE
+
+done:
+    state->rolling = hash;
+    return n;
+}
 
 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
                                ZSTD_compressionParameters const* cParams)
@@ -54,41 +167,6 @@
     return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
 }
 
-/** ZSTD_ldm_getSmallHash() :
- *  numBits should be <= 32
- *  If numBits==0, returns 0.
- *  @return : the most significant numBits of value. */
-static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
-{
-    assert(numBits <= 32);
-    return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
-}
-
-/** ZSTD_ldm_getChecksum() :
- *  numBitsToDiscard should be <= 32
- *  @return : the next most significant 32 bits after numBitsToDiscard */
-static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
-{
-    assert(numBitsToDiscard <= 32);
-    return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
-}
-
-/** ZSTD_ldm_getTag() ;
- *  Given the hash, returns the most significant numTagBits bits
- *  after (32 + hbits) bits.
- *
- *  If there are not enough bits remaining, return the last
- *  numTagBits bits. */
-static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
-{
-    assert(numTagBits < 32 && hbits <= 32);
-    if (32 - hbits < numTagBits) {
-        return hash & (((U32)1 << numTagBits) - 1);
-    } else {
-        return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
-    }
-}
-
 /** ZSTD_ldm_getBucket() :
  *  Returns a pointer to the start of the bucket associated with hash. */
 static ldmEntry_t* ZSTD_ldm_getBucket(
@@ -103,38 +181,12 @@
                                  size_t const hash, const ldmEntry_t entry,
                                  ldmParams_t const ldmParams)
 {
-    BYTE* const bucketOffsets = ldmState->bucketOffsets;
-    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
-    bucketOffsets[hash]++;
-    bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
-}
+    BYTE* const pOffset = ldmState->bucketOffsets + hash;
+    unsigned const offset = *pOffset;
 
-/** ZSTD_ldm_makeEntryAndInsertByTag() :
- *
- *  Gets the small hash, checksum, and tag from the rollingHash.
- *
- *  If the tag matches (1 << ldmParams.hashRateLog)-1, then
- *  creates an ldmEntry from the offset, and inserts it into the hash table.
- *
- *  hBits is the length of the small hash, which is the most significant hBits
- *  of rollingHash. The checksum is the next 32 most significant bits, followed
- *  by ldmParams.hashRateLog bits that make up the tag. */
-static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
-                                             U64 const rollingHash,
-                                             U32 const hBits,
-                                             U32 const offset,
-                                             ldmParams_t const ldmParams)
-{
-    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
-    U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
-    if (tag == tagMask) {
-        U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
-        U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
-        ldmEntry_t entry;
-        entry.offset = offset;
-        entry.checksum = checksum;
-        ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
-    }
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
+    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
+
 }
 
 /** ZSTD_ldm_countBackwardsMatch() :
@@ -212,43 +264,42 @@
     return 0;
 }
 
-/** ZSTD_ldm_fillLdmHashTable() :
- *
- *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
- *  lastHash is the rolling hash that corresponds to lastHashed.
- *
- *  Returns the rolling hash corresponding to position iend-1. */
-static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
-                                     U64 lastHash, const BYTE* lastHashed,
-                                     const BYTE* iend, const BYTE* base,
-                                     U32 hBits, ldmParams_t const ldmParams)
-{
-    U64 rollingHash = lastHash;
-    const BYTE* cur = lastHashed + 1;
-
-    while (cur < iend) {
-        rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
-                                              cur[ldmParams.minMatchLength-1],
-                                              state->hashPower);
-        ZSTD_ldm_makeEntryAndInsertByTag(state,
-                                         rollingHash, hBits,
-                                         (U32)(cur - base), ldmParams);
-        ++cur;
-    }
-    return rollingHash;
-}
-
 void ZSTD_ldm_fillHashTable(
-            ldmState_t* state, const BYTE* ip,
+            ldmState_t* ldmState, const BYTE* ip,
             const BYTE* iend, ldmParams_t const* params)
 {
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const istart = ip;
+    ldmRollingHashState_t hashState;
+    size_t* const splits = ldmState->splitIndices;
+    unsigned numSplits;
+
     DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
-    if ((size_t)(iend - ip) >= params->minMatchLength) {
-        U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
-        ZSTD_ldm_fillLdmHashTable(
-            state, startingHash, ip, iend - params->minMatchLength, state->window.base,
-            params->hashLog - params->bucketSizeLog,
-            *params);
+
+    ZSTD_ldm_gear_init(&hashState, params);
+    while (ip < iend) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            if (ip + splits[n] >= istart + minMatchLength) {
+                BYTE const* const split = ip + splits[n] - minMatchLength;
+                U64 const xxhash = XXH64(split, minMatchLength, 0);
+                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+                ldmEntry_t entry;
+
+                entry.offset = (U32)(split - base);
+                entry.checksum = (U32)(xxhash >> 32);
+                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
+            }
+        }
+
+        ip += hashed;
     }
 }
 
@@ -274,11 +325,8 @@
     /* LDM parameters */
     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
     U32 const minMatchLength = params->minMatchLength;
-    U64 const hashPower = ldmState->hashPower;
+    U32 const entsPerBucket = 1U << params->bucketSizeLog;
     U32 const hBits = params->hashLog - params->bucketSizeLog;
-    U32 const ldmBucketSize = 1U << params->bucketSizeLog;
-    U32 const hashRateLog = params->hashRateLog;
-    U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
     /* Prefix and extDict parameters */
     U32 const dictLimit = ldmState->window.dictLimit;
     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@@ -290,45 +338,69 @@
     /* Input bounds */
     BYTE const* const istart = (BYTE const*)src;
     BYTE const* const iend = istart + srcSize;
-    BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
+    BYTE const* const ilimit = iend - HASH_READ_SIZE;
     /* Input positions */
     BYTE const* anchor = istart;
     BYTE const* ip = istart;
-    /* Rolling hash */
-    BYTE const* lastHashed = NULL;
-    U64 rollingHash = 0;
+    /* Rolling hash state */
+    ldmRollingHashState_t hashState;
+    /* Arrays for staged-processing */
+    size_t* const splits = ldmState->splitIndices;
+    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
+    unsigned numSplits;
 
-    while (ip <= ilimit) {
-        size_t mLength;
-        U32 const curr = (U32)(ip - base);
-        size_t forwardMatchLength = 0, backwardMatchLength = 0;
-        ldmEntry_t* bestEntry = NULL;
-        if (ip != istart) {
-            rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
-                                                  lastHashed[minMatchLength],
-                                                  hashPower);
-        } else {
-            rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
-        }
-        lastHashed = ip;
+    if (srcSize < minMatchLength)
+        return iend - anchor;
 
-        /* Do not insert and do not look for a match */
-        if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
-           ip++;
-           continue;
+    /* Initialize the rolling hash state with the first minMatchLength bytes */
+    ZSTD_ldm_gear_init(&hashState, params);
+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
+    ip += minMatchLength;
+
+    while (ip < ilimit) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
+                                    splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            BYTE const* const split = ip + splits[n] - minMatchLength;
+            U64 const xxhash = XXH64(split, minMatchLength, 0);
+            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+
+            candidates[n].split = split;
+            candidates[n].hash = hash;
+            candidates[n].checksum = (U32)(xxhash >> 32);
+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
+            PREFETCH_L1(candidates[n].bucket);
         }
 
-        /* Get the best entry and compute the match lengths */
-        {
-            ldmEntry_t* const bucket =
-                ZSTD_ldm_getBucket(ldmState,
-                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
-                                   *params);
-            ldmEntry_t* cur;
-            size_t bestMatchLength = 0;
-            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
+        for (n = 0; n < numSplits; n++) {
+            size_t forwardMatchLength = 0, backwardMatchLength = 0,
+                   bestMatchLength = 0, mLength;
+            U32 offset;
+            BYTE const* const split = candidates[n].split;
+            U32 const checksum = candidates[n].checksum;
+            U32 const hash = candidates[n].hash;
+            ldmEntry_t* const bucket = candidates[n].bucket;
+            ldmEntry_t const* cur;
+            ldmEntry_t const* bestEntry = NULL;
+            ldmEntry_t newEntry;
 
-            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
+            newEntry.offset = (U32)(split - base);
+            newEntry.checksum = checksum;
+
+            /* If a split point would generate a sequence overlapping with
+             * the previous one, we merely register it in the hash table and
+             * move on */
+            if (split < anchor) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
                 size_t curForwardMatchLength, curBackwardMatchLength,
                        curTotalMatchLength;
                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
@@ -342,31 +414,23 @@
                         cur->offset < dictLimit ? dictEnd : iend;
                     BYTE const* const lowMatchPtr =
                         cur->offset < dictLimit ? dictStart : lowPrefixPtr;
-
-                    curForwardMatchLength = ZSTD_count_2segments(
-                                                ip, pMatch, iend,
-                                                matchEnd, lowPrefixPtr);
+                    curForwardMatchLength =
+                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
                     if (curForwardMatchLength < minMatchLength) {
                         continue;
                     }
-                    curBackwardMatchLength =
-                        ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor,
-                                                               pMatch, lowMatchPtr,
-                                                               dictStart, dictEnd);
-                    curTotalMatchLength = curForwardMatchLength +
-                                          curBackwardMatchLength;
+                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
+                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
                 } else { /* !extDict */
                     BYTE const* const pMatch = base + cur->offset;
-                    curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
+                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
                     if (curForwardMatchLength < minMatchLength) {
                         continue;
                     }
                     curBackwardMatchLength =
-                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
-                                                     lowPrefixPtr);
-                    curTotalMatchLength = curForwardMatchLength +
-                                          curBackwardMatchLength;
+                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
                 }
+                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
 
                 if (curTotalMatchLength > bestMatchLength) {
                     bestMatchLength = curTotalMatchLength;
@@ -375,57 +439,54 @@
                     bestEntry = cur;
                 }
             }
-        }
 
-        /* No match found -- continue searching */
-        if (bestEntry == NULL) {
-            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
-                                             hBits, curr,
-                                             *params);
-            ip++;
-            continue;
-        }
+            /* No match found -- insert an entry into the hash table
+             * and process the next candidate match */
+            if (bestEntry == NULL) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
 
-        /* Match found */
-        mLength = forwardMatchLength + backwardMatchLength;
-        ip -= backwardMatchLength;
+            /* Match found */
+            offset = (U32)(split - base) - bestEntry->offset;
+            mLength = forwardMatchLength + backwardMatchLength;
+            {
+                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
 
-        {
-            /* Store the sequence:
-             * ip = curr - backwardMatchLength
-             * The match is at (bestEntry->offset - backwardMatchLength)
+                /* Out of sequence storage */
+                if (rawSeqStore->size == rawSeqStore->capacity)
+                    return ERROR(dstSize_tooSmall);
+                seq->litLength = (U32)(split - backwardMatchLength - anchor);
+                seq->matchLength = (U32)mLength;
+                seq->offset = offset;
+                rawSeqStore->size++;
+            }
+
+            /* Insert the current entry into the hash table --- it must be
+             * done after the previous block to avoid clobbering bestEntry */
+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+
+            anchor = split + forwardMatchLength;
+
+            /* If we find a match that ends after the data that we've hashed
+             * then we have a repeating, overlapping, pattern. E.g. all zeros.
+             * If one repetition of the pattern matches our `stopMask` then all
+             * repetitions will. We don't need to insert them all into out table,
+             * only the first one. So skip over overlapping matches.
+             * This is a major speed boost (20x) for compressing a single byte
+             * repeated, when that byte ends up in the table.
              */
-            U32 const matchIndex = bestEntry->offset;
-            U32 const offset = curr - matchIndex;
-            rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
-
-            /* Out of sequence storage */
-            if (rawSeqStore->size == rawSeqStore->capacity)
-                return ERROR(dstSize_tooSmall);
-            seq->litLength = (U32)(ip - anchor);
-            seq->matchLength = (U32)mLength;
-            seq->offset = offset;
-            rawSeqStore->size++;
+            if (anchor > ip + hashed) {
+                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
+                /* Continue the outter loop at anchor (ip + hashed == anchor). */
+                ip = anchor - hashed;
+                break;
+            }
         }
 
-        /* Insert the current entry into the hash table */
-        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
-                                         (U32)(lastHashed - base),
-                                         *params);
-
-        assert(ip + backwardMatchLength == lastHashed);
-
-        /* Fill the hash table from lastHashed+1 to ip+mLength*/
-        /* Heuristic: don't need to fill the entire table at end of block */
-        if (ip + mLength <= ilimit) {
-            rollingHash = ZSTD_ldm_fillLdmHashTable(
-                              ldmState, rollingHash, lastHashed,
-                              ip + mLength, base, hBits, *params);
-            lastHashed = ip + mLength - 1;
-        }
-        ip += mLength;
-        anchor = ip;
+        ip += hashed;
     }
+
     return iend - anchor;
 }
 
@@ -474,7 +535,7 @@
 
         assert(chunkStart < iend);
         /* 1. Perform overflow correction if necessary. */
-        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
             U32 const ldmHSize = 1U << params->hashLog;
             U32 const correction = ZSTD_window_correctOverflow(
                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@@ -596,12 +657,13 @@
 
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
     void const* src, size_t srcSize)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     unsigned const minMatch = cParams->minMatch;
     ZSTD_blockCompressor const blockCompressor =
-        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
     /* Input bounds */
     BYTE const* const istart = (BYTE const*)src;
     BYTE const* const iend = istart + srcSize;
@@ -620,7 +682,7 @@
 
     assert(rawSeqStore->pos <= rawSeqStore->size);
     assert(rawSeqStore->size <= rawSeqStore->capacity);
-    /* Loop through each sequence and apply the block compressor to the lits */
+    /* Loop through each sequence and apply the block compressor to the literals */
     while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
         /* maybeSplitSequence updates rawSeqStore->pos */
         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h
index 6561024..393466f 100644
--- a/lib/compress/zstd_ldm.h
+++ b/lib/compress/zstd_ldm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -66,6 +66,7 @@
  */
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
             void const* src, size_t srcSize);
 
 /**
@@ -73,7 +74,7 @@
  *
  * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
  * Avoids emitting matches less than `minMatch` bytes.
- * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
  */
 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
     U32 const minMatch);
diff --git a/lib/compress/zstd_ldm_geartab.h b/lib/compress/zstd_ldm_geartab.h
new file mode 100644
index 0000000..e5c24d8
--- /dev/null
+++ b/lib/compress/zstd_ldm_geartab.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_GEARTAB_H
+#define ZSTD_LDM_GEARTAB_H
+
+static U64 ZSTD_ldm_gearTab[256] = {
+    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
+    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
+    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
+    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
+    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
+    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
+    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
+    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
+    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
+    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
+    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
+    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
+    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
+    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
+    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
+    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
+    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
+    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
+    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
+    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
+    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
+    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
+    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
+    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
+    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
+    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
+    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
+    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
+    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
+    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
+    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
+    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
+    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
+    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
+    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
+    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
+    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
+    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
+    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
+    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
+    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
+    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
+    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
+    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
+    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
+    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
+    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
+    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
+    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
+    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
+    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
+    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
+    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
+    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
+    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
+    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
+    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
+    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
+    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
+    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
+    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
+    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
+    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
+    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
+    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
+    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
+    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
+    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
+    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
+    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
+    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
+    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
+    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
+    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
+    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
+    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
+    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
+    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
+    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
+    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
+    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
+    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
+    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
+    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
+    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
+    0x2b4da14f2613d8f4
+};
+
+#endif /* ZSTD_LDM_GEARTAB_H */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index e55c459..402a7e5 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index 9aba8a9..627255f 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 50454a5..22aa3e1 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -472,8 +472,6 @@
         ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
         assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
         assert(params.ldmParams.hashRateLog < 32);
-        serialState->ldmState.hashPower =
-                ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
     } else {
         ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
     }
@@ -486,10 +484,10 @@
         size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
         unsigned const bucketLog =
             params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
-        size_t const bucketSize = (size_t)1 << bucketLog;
         unsigned const prevBucketLog =
             serialState->params.ldmParams.hashLog -
             serialState->params.ldmParams.bucketSizeLog;
+        size_t const numBuckets = (size_t)1 << bucketLog;
         /* Size the seq pool tables */
         ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
         /* Reset the window */
@@ -501,20 +499,20 @@
         }
         if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
             ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
-            serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
+            serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
         }
         if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
             return 1;
         /* Zero the tables */
         ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
-        ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
+        ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
 
         /* Update window state and fill hash table with dict */
         serialState->ldmState.loadedDictEnd = 0;
         if (dictSize > 0) {
             if (dictContentType == ZSTD_dct_rawContent) {
                 BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
-                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
                 ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
                 serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
             } else {
@@ -571,7 +569,7 @@
             assert(seqStore.seq != NULL && seqStore.pos == 0 &&
                    seqStore.size == 0 && seqStore.capacity > 0);
             assert(src.size <= serialState->params.jobSize);
-            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
             error = ZSTD_ldm_generateSequences(
                 &serialState->ldmState, &seqStore,
                 &serialState->params.ldmParams, src.start, src.size);
@@ -683,6 +681,8 @@
     if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
     /* Don't run LDM for the chunks, since we handle it externally */
     jobParams.ldmParams.enableLdm = 0;
+    /* Correct nbWorkers to 0. */
+    jobParams.nbWorkers = 0;
 
 
     /* init */
@@ -695,6 +695,10 @@
         {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
             if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
         }
+        if (!job->firstJob) {
+            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
+            if (ZSTD_isError(err)) JOB_ERROR(err);
+        }
         {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
                                         job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                         ZSTD_dtlm_fast,
@@ -750,6 +754,13 @@
             if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
             lastCBlockSize = cSize;
     }   }
+    if (!job->firstJob) {
+        /* Double check that we don't have an ext-dict, because then our
+         * repcode invalidation doesn't work.
+         */
+        assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+    }
+    ZSTD_CCtx_trace(cctx, 0);
 
 _endJob:
     ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@@ -1239,9 +1250,8 @@
 
     if (params.rsyncable) {
         /* Aim for the targetsectionSize as the average job size. */
-        U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
-        U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
-        assert(jobSizeMB >= 1);
+        U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
+        U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
         DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
         mtctx->rsync.hash = 0;
         mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index 0a9e551..2fee2ec 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -32,11 +32,11 @@
 
 
 /* ===   Constants   === */
-#ifndef ZSTDMT_NBWORKERS_MAX
-#  define ZSTDMT_NBWORKERS_MAX 200
+#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
+#  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
 #endif
-#ifndef ZSTDMT_JOBSIZE_MIN
-#  define ZSTDMT_JOBSIZE_MIN (1 MB)
+#ifndef ZSTDMT_JOBSIZE_MIN   /* a different value can be selected at compile time */
+#  define ZSTDMT_JOBSIZE_MIN (512 KB)
 #endif
 #define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 1418206..b93c9a0 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * huff0 huffman decoder,
  * part of Finite State Entropy library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -528,13 +528,15 @@
 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
                            const U32* rankValOrigin, const int minWeight,
                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
-                           U32 nbBitsBaseline, U16 baseSeq)
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
 {
     HUF_DEltX2 DElt;
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    U32* rankVal = wksp;
 
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
     /* get pre-calculated rankVal */
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
 
     /* fill skipped values */
     if (minWeight>1) {
@@ -569,14 +571,18 @@
 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
-                           const U32 nbBitsBaseline)
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
 {
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    U32* rankVal = wksp;
     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
     const U32 minBits  = nbBitsBaseline - maxWeight;
     U32 s;
 
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
 
     /* fill DTable */
     for (s=0; s<sortedListSize; s++) {
@@ -594,7 +600,7 @@
             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
                            rankValOrigin[nbBits], minWeight,
                            sortedList+sortedRank, sortedListSize-sortedRank,
-                           nbBitsBaseline, symbol);
+                           nbBitsBaseline, symbol, wksp, wkspSize);
         } else {
             HUF_DEltX2 DElt;
             MEM_writeLE16(&(DElt.sequence), symbol);
@@ -608,6 +614,15 @@
     }
 }
 
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
                        const void* src, size_t srcSize,
                              void* workSpace, size_t wkspSize)
@@ -620,47 +635,32 @@
     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
     U32 *rankStart;
 
-    rankValCol_t* rankVal;
-    U32* rankStats;
-    U32* rankStart0;
-    sortedSymbol_t* sortedSymbol;
-    BYTE* weightList;
-    size_t spaceUsed32 = 0;
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
 
-    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-    rankStats = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 1;
-    rankStart0 = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 2;
-    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
-    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
 
-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
-
-    rankStart = rankStart0 + 1;
-    ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
 
     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
 
-    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
     if (HUF_isError(iSize)) return iSize;
 
     /* check result */
     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
 
     /* Get start index of each weight */
     {   U32 w, nextRankStart = 0;
         for (w=1; w<maxW+1; w++) {
             U32 curr = nextRankStart;
-            nextRankStart += rankStats[w];
+            nextRankStart += wksp->rankStats[w];
             rankStart[w] = curr;
         }
         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
@@ -670,37 +670,38 @@
     /* sort symbols by weight */
     {   U32 s;
         for (s=0; s<nbSymbols; s++) {
-            U32 const w = weightList[s];
+            U32 const w = wksp->weightList[s];
             U32 const r = rankStart[w]++;
-            sortedSymbol[r].symbol = (BYTE)s;
-            sortedSymbol[r].weight = (BYTE)w;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
         }
         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
     }
 
     /* Build rankVal */
-    {   U32* const rankVal0 = rankVal[0];
+    {   U32* const rankVal0 = wksp->rankVal[0];
         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
             U32 nextRankVal = 0;
             U32 w;
             for (w=1; w<maxW+1; w++) {
                 U32 curr = nextRankVal;
-                nextRankVal += rankStats[w] << (w+rescale);
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
                 rankVal0[w] = curr;
         }   }
         {   U32 const minBits = tableLog+1 - maxW;
             U32 consumed;
             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-                U32* const rankValPtr = rankVal[consumed];
+                U32* const rankValPtr = wksp->rankVal[consumed];
                 U32 w;
                 for (w = 1; w < maxW+1; w++) {
                     rankValPtr[w] = rankVal0[w] >> consumed;
     }   }   }   }
 
     HUF_fillDTableX2(dt, maxTableLog,
-                   sortedSymbol, sizeOfSort,
-                   rankStart0, rankVal, maxW,
-                   tableLog+1);
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
 
     dtd.tableLog = (BYTE)maxTableLog;
     dtd.tableType = 1;
@@ -1225,7 +1226,7 @@
     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
     return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
 }
-#endif 
+#endif
 
 #ifndef HUF_FORCE_DECOMPRESS_X1
 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c
index f5cc23b..ce33547 100644
--- a/lib/decompress/zstd_ddict.c
+++ b/lib/decompress/zstd_ddict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/decompress/zstd_ddict.h b/lib/decompress/zstd_ddict.h
index 8906a71..bd03268 100644
--- a/lib/decompress/zstd_ddict.h
+++ b/lib/decompress/zstd_ddict.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 21f846b..910bc03 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -62,6 +62,7 @@
 #include "../common/fse.h"
 #define HUF_STATIC_LINKING_ONLY
 #include "../common/huf.h"
+#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
 #include "../common/zstd_internal.h"  /* blockProperties_t */
 #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
 #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
@@ -72,6 +73,144 @@
 #endif
 
 
+
+/*************************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                     * Currently, that means a 0.75 load factor.
+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                     * the load factor of the ddict hash set.
+                                                     */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = XXH64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL) {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+    size_t oldTableSize = hashSet->ddictPtrTableSize;
+    size_t i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount = 0;
+    for (i = 0; i < oldTableSize; ++i) {
+        if (oldTable[i] != NULL) {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*)oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;) {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0) {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        } else {
+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount = 0;
+    if (!ret || !ret->ddictPtrTable) {
+        return NULL;
+    }
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable) {
+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet) {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
 /*-*************************************************************
 *   Context management
 ***************************************************************/
@@ -101,6 +240,7 @@
     dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
     dctx->outBufferMode = ZSTD_bm_buffered;
     dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
 }
 
 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@@ -120,8 +260,8 @@
     dctx->noForwardProgress = 0;
     dctx->oversizedDuration = 0;
     dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->ddictSet = NULL;
     ZSTD_DCtx_resetParameters(dctx);
-    dctx->validateChecksum = 1;
 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
     dctx->dictContentEndForFuzzing = NULL;
 #endif
@@ -178,6 +318,10 @@
         if (dctx->legacyContext)
             ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
 #endif
+        if (dctx->ddictSet) {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
         ZSTD_customFree(dctx, cMem);
         return 0;
     }
@@ -190,6 +334,29 @@
     ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
 }
 
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict) {
+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict) {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID = dctx->fParams.dictID;
+            dctx->ddict = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
+}
+
 
 /*-*************************************************************
  *   Frame header decoding
@@ -441,12 +608,19 @@
 
 /** ZSTD_decodeFrameHeader() :
  * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
  * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
 {
     size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
     if (ZSTD_isError(result)) return result;    /* invalid header */
     RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
     /* Skip the dictID check in fuzzing mode, because it makes the search
      * harder.
@@ -456,6 +630,7 @@
 #endif
     dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
     if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
     return 0;
 }
 
@@ -578,7 +753,7 @@
 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
 {
     DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
-    ZSTD_checkContinuity(dctx, blockStart);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
     dctx->previousDstEnd = (const char*)blockStart + blockSize;
     return blockSize;
 }
@@ -610,6 +785,32 @@
     return regenSize;
 }
 
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+#if ZSTD_TRACE
+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
+        ZSTD_Trace trace;
+        ZSTD_memset(&trace, 0, sizeof(trace));
+        trace.version = ZSTD_VERSION_NUMBER;
+        trace.streaming = streaming;
+        if (dctx->ddict) {
+            trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict);
+            trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict);
+            trace.dictionaryIsCold = dctx->ddictIsCold;
+        }
+        trace.uncompressedSize = (size_t)uncompressedSize;
+        trace.compressedSize = (size_t)compressedSize;
+        trace.dctx = dctx;
+        ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
+    }
+#else
+    (void)dctx;
+    (void)uncompressedSize;
+    (void)compressedSize;
+    (void)streaming;
+#endif
+}
+
 
 /*! ZSTD_decompressFrame() :
  * @dctx must be properly initialized
@@ -619,8 +820,9 @@
                                    void* dst, size_t dstCapacity,
                              const void** srcPtr, size_t *srcSizePtr)
 {
-    const BYTE* ip = (const BYTE*)(*srcPtr);
-    BYTE* const ostart = (BYTE* const)dst;
+    const BYTE* const istart = (const BYTE*)(*srcPtr);
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
     BYTE* op = ostart;
     size_t remainingSrcSize = *srcSizePtr;
@@ -695,7 +897,7 @@
         ip += 4;
         remainingSrcSize -= 4;
     }
-
+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
     /* Allow caller to get size read */
     *srcPtr = ip;
     *srcSizePtr = remainingSrcSize;
@@ -764,7 +966,7 @@
              * use this in all cases but ddict */
             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
         }
-        ZSTD_checkContinuity(dctx, dst);
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
 
         {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
                                                     &src, &srcSize);
@@ -899,7 +1101,9 @@
     DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
     /* Sanity check */
     RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
-    if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
 
     switch (dctx->stage)
     {
@@ -1004,6 +1208,7 @@
                     dctx->expected = 4;
                     dctx->stage = ZSTDds_checkChecksum;
                 } else {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
                     dctx->expected = 0;   /* ends here */
                     dctx->stage = ZSTDds_getFrameHeaderSize;
                 }
@@ -1023,6 +1228,7 @@
                 DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
                 RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
             }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
             dctx->expected = 0;
             dctx->stage = ZSTDds_getFrameHeaderSize;
             return 0;
@@ -1176,8 +1382,12 @@
 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 {
     assert(dctx != NULL);
+#if ZSTD_TRACE
+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
+#endif
     dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
     dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize = 0;
     dctx->decodedSize = 0;
     dctx->previousDstEnd = NULL;
     dctx->prefixStart = NULL;
@@ -1391,6 +1601,16 @@
     if (ddict) {
         dctx->ddict = ddict;
         dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+            if (dctx->ddictSet == NULL) {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet) {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+        }
     }
     return 0;
 }
@@ -1436,6 +1656,10 @@
             bounds.lowerBound = (int)ZSTD_d_validateChecksum;
             bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
             return bounds;
+        case ZSTD_d_refMultipleDDicts:
+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+            return bounds;
         default:;
     }
     bounds.error = ERROR(parameter_unsupported);
@@ -1473,6 +1697,9 @@
         case ZSTD_d_forceIgnoreChecksum:
             *value = (int)dctx->forceIgnoreChecksum;
             return 0;
+        case ZSTD_d_refMultipleDDicts:
+            *value = (int)dctx->refMultipleDDicts;
+            return 0;
         default:;
     }
     RETURN_ERROR(parameter_unsupported, "");
@@ -1499,6 +1726,13 @@
             CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
             dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
             return 0;
+        case ZSTD_d_refMultipleDDicts:
+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+            if (dctx->staticSize != 0) {
+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+            }
+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+            return 0;
         default:;
     }
     RETURN_ERROR(parameter_unsupported, "");
@@ -1680,6 +1914,9 @@
             }   }
 #endif
             {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet) {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
                 DEBUGLOG(5, "header size : %u", (U32)hSize);
                 if (ZSTD_isError(hSize)) {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index bec82e8..349dcdc 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -236,7 +236,7 @@
 
 /* Default FSE distribution tables.
  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
  * They were generated programmatically with following method :
  * - start from default distributions, present in /lib/common/zstd_internal.h
  * - generate tables normally, using ZSTD_buildFSETable()
@@ -577,7 +577,7 @@
 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                              const void* src, size_t srcSize)
 {
-    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const istart = (const BYTE*)src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip = istart;
     int nbSeq;
@@ -658,7 +658,6 @@
     size_t litLength;
     size_t matchLength;
     size_t offset;
-    const BYTE* match;
 } seq_t;
 
 typedef struct {
@@ -672,9 +671,6 @@
     ZSTD_fseState stateOffb;
     ZSTD_fseState stateML;
     size_t prevOffset[ZSTD_REP_NUM];
-    const BYTE* prefixStart;
-    const BYTE* dictEnd;
-    size_t pos;
 } seqState_t;
 
 /*! ZSTD_overlapCopy8() :
@@ -936,10 +932,9 @@
         : 0)
 
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
 
 FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
     seq_t seq;
     ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
@@ -1014,14 +1009,6 @@
     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
 
-    if (prefetch == ZSTD_p_prefetch) {
-        size_t const pos = seqState->pos + seq.litLength;
-        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
-        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
-        seqState->pos = pos + seq.matchLength;
-    }
-
     /* ANS state update
      * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
      * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
@@ -1108,7 +1095,7 @@
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
@@ -1122,7 +1109,6 @@
     /* Regen sequences */
     if (nbSeq) {
         seqState_t seqState;
-        size_t error = 0;
         dctx->fseEntropy = 1;
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         RETURN_ERROR_IF(
@@ -1156,13 +1142,14 @@
          * If you see most cycles served out of the DSB you've hit the good case.
          * If it is pretty even then you may be in an okay case.
          *
-         * I've been able to reproduce this issue on the following CPUs:
+         * This issue has been reproduced on the following CPUs:
          *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
          *               Use Instruments->Counters to get DSB/MITE cycles.
          *               I never got performance swings, but I was able to
          *               go from the good case of mostly DSB to half of the
          *               cycles served from MITE.
          *   - Coffeelake: Intel i9-9900k
+         *   - Coffeelake: Intel i7-9700k
          *
          * I haven't been able to reproduce the instability or DSB misses on any
          * of the following CPUS:
@@ -1175,33 +1162,35 @@
          *
          *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
          */
+        __asm__(".p2align 6");
+        __asm__("nop");
         __asm__(".p2align 5");
         __asm__("nop");
+#  if __GNUC__ >= 9
+        /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
+        __asm__(".p2align 3");
+#  else
         __asm__(".p2align 4");
+#  endif
 #endif
         for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
             assert(!ZSTD_isError(oneSeqSize));
             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-            BIT_reloadDStream(&(seqState.DStream));
             op += oneSeqSize;
-            /* gcc and clang both don't like early returns in this loop.
-             * Instead break and check for an error at the end of the loop.
-             */
-            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
-                error = oneSeqSize;
+            if (UNLIKELY(!--nbSeq))
                 break;
-            }
-            if (UNLIKELY(!--nbSeq)) break;
+            BIT_reloadDStream(&(seqState.DStream));
         }
 
         /* check if reached exact end */
         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        if (ZSTD_isError(error)) return error;
         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
         /* save reps for next block */
@@ -1232,6 +1221,24 @@
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+    prefetchPos += sequence.litLength;
+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_decompressSequencesLong_body(
                                ZSTD_DCtx* dctx,
@@ -1242,7 +1249,7 @@
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
@@ -1254,18 +1261,17 @@
 
     /* Regen sequences */
     if (nbSeq) {
-#define STORED_SEQS 4
+#define STORED_SEQS 8
 #define STORED_SEQS_MASK (STORED_SEQS-1)
-#define ADVANCED_SEQS 4
+#define ADVANCED_SEQS STORED_SEQS
         seq_t sequences[STORED_SEQS];
         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
         seqState_t seqState;
         int seqNb;
+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
         dctx->fseEntropy = 1;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        seqState.prefixStart = prefixStart;
-        seqState.pos = (size_t)(op-prefixStart);
-        seqState.dictEnd = dictEnd;
         assert(dst != NULL);
         assert(iend >= ip);
         RETURN_ERROR_IF(
@@ -1277,21 +1283,23 @@
 
         /* prepare in advance */
         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
-            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb] = sequence;
         }
         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
 
         /* decode and decompress */
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
             assert(!ZSTD_isError(oneSeqSize));
             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
             sequences[seqNb & STORED_SEQS_MASK] = sequence;
             op += oneSeqSize;
         }
@@ -1517,9 +1525,9 @@
 }
 
 
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
 {
-    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
         dctx->dictEnd = dctx->previousDstEnd;
         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
         dctx->prefixStart = dst;
@@ -1533,7 +1541,7 @@
                       const void* src, size_t srcSize)
 {
     size_t dSize;
-    ZSTD_checkContinuity(dctx, dst);
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
     dctx->previousDstEnd = (char*)dst + dSize;
     return dSize;
diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h
index b5715c1..049a0cd 100644
--- a/lib/decompress/zstd_decompress_block.h
+++ b/lib/decompress/zstd_decompress_block.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h
index f80b471..ebda0c9 100644
--- a/lib/decompress/zstd_decompress_internal.h
+++ b/lib/decompress/zstd_decompress_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -99,6 +99,13 @@
     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
 } ZSTD_dictUses_e;
 
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t ddictPtrTableSize;
+    size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
+
 struct ZSTD_DCtx_s
 {
     const ZSTD_seqSymbol* LLTptr;
@@ -113,6 +120,7 @@
     const void* dictEnd;          /* end of previous segment */
     size_t expected;
     ZSTD_frameHeader fParams;
+    U64 processedCSize;
     U64 decodedSize;
     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
     ZSTD_dStage stage;
@@ -136,6 +144,8 @@
     U32 dictID;
     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
     ZSTD_dictUses_e dictUses;
+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
 
     /* streaming */
     ZSTD_dStreamStage streamStage;
@@ -166,6 +176,11 @@
     void const* dictContentBeginForFuzzing;
     void const* dictContentEndForFuzzing;
 #endif
+
+    /* Tracing */
+#if ZSTD_TRACE
+    ZSTD_TraceCtx traceCtx;
+#endif
 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
 
@@ -184,7 +199,7 @@
  *  If yes, do nothing (continue on current segment).
  *  If not, classify previous segment as "external dictionary", and start a new segment.
  *  This function cannot fail. */
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
 
 
 #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h
index 03cb14a..b83ea0f 100644
--- a/lib/deprecated/zbuff.h
+++ b/lib/deprecated/zbuff.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/deprecated/zbuff_common.c b/lib/deprecated/zbuff_common.c
index 579bc4d..e7d01a0 100644
--- a/lib/deprecated/zbuff_common.c
+++ b/lib/deprecated/zbuff_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/deprecated/zbuff_compress.c b/lib/deprecated/zbuff_compress.c
index 2d20b13..2e72267 100644
--- a/lib/deprecated/zbuff_compress.c
+++ b/lib/deprecated/zbuff_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/deprecated/zbuff_decompress.c b/lib/deprecated/zbuff_decompress.c
index d3c49e8..d73c0f3 100644
--- a/lib/deprecated/zbuff_decompress.c
+++ b/lib/deprecated/zbuff_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index c78af13..8364444 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,15 +26,16 @@
 #include <string.h> /* memset */
 #include <time.h>   /* clock */
 
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include "../common/mem.h" /* read */
 #include "../common/pool.h"
 #include "../common/threading.h"
-#include "cover.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"
 
 /*-*************************************
 *  Constants
@@ -1062,18 +1063,19 @@
  * This function is thread safe if zstd is compiled with multithreaded support.
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
  */
-static void COVER_tryParameters(void *opaque) {
+static void COVER_tryParameters(void *opaque)
+{
   /* Save parameters as local variables */
-  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
   const COVER_ctx_t *const ctx = data->ctx;
   const ZDICT_cover_params_t parameters = data->parameters;
   size_t dictBufferCapacity = data->dictBufferCapacity;
   size_t totalCompressedSize = ERROR(GENERIC);
   /* Allocate space for hash table, dict, and freqs */
   COVER_map_t activeDmers;
-  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
   COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
-  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
     goto _cleanup;
@@ -1103,15 +1105,14 @@
   free(data);
   COVER_map_destroy(&activeDmers);
   COVER_dictSelectionFree(selection);
-  if (freqs) {
-    free(freqs);
-  }
+  free(freqs);
 }
 
 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
-    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
-    const size_t *samplesSizes, unsigned nbSamples,
-    ZDICT_cover_params_t *parameters) {
+    void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
+    const size_t* samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t* parameters)
+{
   /* constants */
   const unsigned nbThreads = parameters->nbThreads;
   const double splitPoint =
diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h
index 9f1cb5f..1aacddd 100644
--- a/lib/dictBuilder/cover.h
+++ b/lib/dictBuilder/cover.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,10 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include <stdio.h>  /* fprintf */
 #include <stdlib.h> /* malloc, free, qsort */
 #include <string.h> /* memset */
@@ -16,10 +20,7 @@
 #include "../common/pool.h"
 #include "../common/threading.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
 
 /**
  * COVER_best_t is used for two purposes:
diff --git a/lib/dictBuilder/divsufsort.c b/lib/dictBuilder/divsufsort.c
index ead9220..a2870fb 100644
--- a/lib/dictBuilder/divsufsort.c
+++ b/lib/dictBuilder/divsufsort.c
@@ -1576,7 +1576,7 @@
     /* Construct the inverse suffix array of type B* suffixes using trsort. */
     trsort(ISAb, SA, m, 1);
 
-    /* Set the sorted order of tyoe B* suffixes. */
+    /* Set the sorted order of type B* suffixes. */
     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
       if(0 <= i) {
diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c
index 5e60f24..ed789f9 100644
--- a/lib/dictBuilder/fastcover.c
+++ b/lib/dictBuilder/fastcover.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,16 +16,17 @@
 #include <string.h> /* memset */
 #include <time.h>   /* clock */
 
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include "../common/mem.h" /* read */
 #include "../common/pool.h"
 #include "../common/threading.h"
-#include "cover.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
 #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"
 
 
 /*-*************************************
@@ -462,20 +463,20 @@
  * This function is thread safe if zstd is compiled with multithreaded support.
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
  */
-static void FASTCOVER_tryParameters(void *opaque)
+static void FASTCOVER_tryParameters(void* opaque)
 {
   /* Save parameters as local variables */
-  FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
+  FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
   const FASTCOVER_ctx_t *const ctx = data->ctx;
   const ZDICT_cover_params_t parameters = data->parameters;
   size_t dictBufferCapacity = data->dictBufferCapacity;
   size_t totalCompressedSize = ERROR(GENERIC);
   /* Initialize array to keep track of frequency of dmer within activeSegment */
-  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
+  U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
   /* Allocate space for hash table, dict, and freqs */
-  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
   COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
-  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
+  U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
   if (!segmentFreqs || !dict || !freqs) {
     DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
     goto _cleanup;
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 79c522e..459cbe4 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -23,9 +23,13 @@
 /* Unix Large Files support (>4GB) */
 #define _FILE_OFFSET_BITS 64
 #if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  ifndef _LARGEFILE_SOURCE
 #  define _LARGEFILE_SOURCE
+#  endif
 #elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  ifndef _LARGEFILE64_SOURCE
 #  define _LARGEFILE64_SOURCE
+#  endif
 #endif
 
 
@@ -37,18 +41,19 @@
 #include <stdio.h>         /* fprintf, fopen, ftello64 */
 #include <time.h>          /* clock */
 
-#include "../common/mem.h"           /* read */
-#include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
-#include "../common/zstd_internal.h" /* includes zstd.h */
-#include "../common/xxhash.h"        /* XXH64 */
-#include "divsufsort.h"
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #  define ZDICT_STATIC_LINKING_ONLY
 #endif
-#include "zdict.h"
+#define HUF_STATIC_LINKING_ONLY
+
+#include "../common/mem.h"           /* read */
+#include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
+#include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
+#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/xxhash.h"        /* XXH64 */
 #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
+#include "../zdict.h"
+#include "divsufsort.h"
 
 
 /*-*************************************
@@ -967,16 +972,11 @@
     return MIN(dictBufferCapacity, hSize+dictContentSize);
 }
 
-/* Hidden declaration for dbio.c */
-size_t ZDICT_trainFromBuffer_unsafe_legacy(
-                            void* dictBuffer, size_t maxDictSize,
-                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                            ZDICT_legacy_params_t params);
 /*! ZDICT_trainFromBuffer_unsafe_legacy() :
-*   Warning : `samplesBuffer` must be followed by noisy guard band.
+*   Warning : `samplesBuffer` must be followed by noisy guard band !!!
 *   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
 */
-size_t ZDICT_trainFromBuffer_unsafe_legacy(
+static size_t ZDICT_trainFromBuffer_unsafe_legacy(
                             void* dictBuffer, size_t maxDictSize,
                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                             ZDICT_legacy_params_t params)
diff --git a/lib/dll/example/Makefile b/lib/dll/example/Makefile
index 8f19195..03b034d 100644
--- a/lib/dll/example/Makefile
+++ b/lib/dll/example/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index 6bea6a5..a6f1174 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index 13115be..7ab5547 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h
index 7910351..f777eb6 100644
--- a/lib/legacy/zstd_v01.h
+++ b/lib/legacy/zstd_v01.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 9abb6d0..89fdc71 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h
index 5f8f6cd..1b37195 100644
--- a/lib/legacy/zstd_v02.h
+++ b/lib/legacy/zstd_v02.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index a19cb20..5262d51 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h
index 5fc7273..7a00d43 100644
--- a/lib/legacy/zstd_v03.h
+++ b/lib/legacy/zstd_v03.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 77d5255..bee1b99 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h
index 15fce0d..66b97ab 100644
--- a/lib/legacy/zstd_v04.h
+++ b/lib/legacy/zstd_v04.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index ca8d5c9..eb8966b 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -2833,7 +2833,7 @@
 
 static size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
 {
-    const BYTE* const in = (const BYTE* const)src;
+    const BYTE* const in = (const BYTE*)src;
     BYTE headerFlags;
     U32 cSize;
 
@@ -3002,7 +3002,7 @@
                          FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
                          const void* src, size_t srcSize, U32 flagStaticTable)
 {
-    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* const iend = istart + srcSize;
     U32 LLtype, Offtype, MLtype;
@@ -3310,7 +3310,7 @@
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
     BYTE* const oend = ostart + maxDstSize;
     size_t errorCode, dumpsLength=0;
@@ -3423,7 +3423,7 @@
 {
     const BYTE* ip = (const BYTE*)src;
     const BYTE* iend = ip + srcSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
     BYTE* const oend = ostart + maxDstSize;
     size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h
index 167d892..bd423bf 100644
--- a/lib/legacy/zstd_v05.h
+++ b/lib/legacy/zstd_v05.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index c4ac7db..fcb16d4 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -3029,7 +3029,7 @@
 *   Provides the size of compressed block from block header `src` */
 static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
 {
-    const BYTE* const in = (const BYTE* const)src;
+    const BYTE* const in = (const BYTE*)src;
     U32 cSize;
 
     if (srcSize < ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
@@ -3223,7 +3223,7 @@
                              FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable,
                              const void* src, size_t srcSize)
 {
-    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const istart = (const BYTE*)src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip = istart;
 
@@ -3445,7 +3445,7 @@
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
@@ -3561,7 +3561,7 @@
 {
     const BYTE* ip = (const BYTE*)src;
     const BYTE* const iend = ip + srcSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
     BYTE* const oend = ostart + dstCapacity;
     size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h
index 2fd99e6..9e32b76 100644
--- a/lib/legacy/zstd_v06.h
+++ b/lib/legacy/zstd_v06.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index 049ba47..0d0e466 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -3258,7 +3258,7 @@
 *   Provides the size of compressed block from block header `src` */
 static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
 {
-    const BYTE* const in = (const BYTE* const)src;
+    const BYTE* const in = (const BYTE*)src;
     U32 cSize;
 
     if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
@@ -3453,7 +3453,7 @@
                              FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
                              const void* src, size_t srcSize)
 {
-    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const istart = (const BYTE*)src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip = istart;
 
@@ -3672,7 +3672,7 @@
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
@@ -3799,7 +3799,7 @@
 {
     const BYTE* ip = (const BYTE*)src;
     const BYTE* const iend = ip + srcSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
     size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h
index 9da50c4..bc35cfa 100644
--- a/lib/legacy/zstd_v07.h
+++ b/lib/legacy/zstd_v07.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/dictBuilder/zdict.h b/lib/zdict.h
similarity index 70%
rename from lib/dictBuilder/zdict.h
rename to lib/zdict.h
index b782993..75b05db 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/zdict.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,145 @@
 #  define ZDICTLIB_API ZDICTLIB_VISIBILITY
 #endif
 
+/*******************************************************************************
+ * Zstd dictionary builder
+ *
+ * FAQ
+ * ===
+ * Why should I use a dictionary?
+ * ------------------------------
+ *
+ * Zstd can use dictionaries to improve compression ratio of small data.
+ * Traditionally small files don't compress well because there is very little
+ * repetion in a single sample, since it is small. But, if you are compressing
+ * many similar files, like a bunch of JSON records that share the same
+ * structure, you can train a dictionary on ahead of time on some samples of
+ * these files. Then, zstd can use the dictionary to find repetitions that are
+ * present across samples. This can vastly improve compression ratio.
+ *
+ * When is a dictionary useful?
+ * ----------------------------
+ *
+ * Dictionaries are useful when compressing many small files that are similar.
+ * The larger a file is, the less benefit a dictionary will have. Generally,
+ * we don't expect dictionary compression to be effective past 100KB. And the
+ * smaller a file is, the more we would expect the dictionary to help.
+ *
+ * How do I use a dictionary?
+ * --------------------------
+ *
+ * Simply pass the dictionary to the zstd compressor with
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
+ * more advanced functions that allow selecting some options, see zstd.h for
+ * complete documentation.
+ *
+ * What is a zstd dictionary?
+ * --------------------------
+ *
+ * A zstd dictionary has two pieces: Its header, and its content. The header
+ * contains a magic number, the dictionary ID, and entropy tables. These
+ * entropy tables allow zstd to save on header costs in the compressed file,
+ * which really matters for small data. The content is just bytes, which are
+ * repeated content that is common across many samples.
+ *
+ * What is a raw content dictionary?
+ * ---------------------------------
+ *
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
+ * content dictionary.
+ *
+ * How do I train a dictionary?
+ * ----------------------------
+ *
+ * Gather samples from your use case. These samples should be similar to each
+ * other. If you have several use cases, you could try to train one dictionary
+ * per use case.
+ *
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
+ * dictionary. There are a few advanced versions of this function, but this
+ * is a great starting point. If you want to further tune your dictionary
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
+ *
+ * If the dictionary training function fails, that is likely because you
+ * either passed too few samples, or a dictionary would not be effective
+ * for your data. Look at the messages that the dictionary trainer printed,
+ * if it doesn't say too few samples, then a dictionary would not be effective.
+ *
+ * How large should my dictionary be?
+ * ----------------------------------
+ *
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
+ * dictionary larger than that. But, most use cases can get away with a
+ * smaller dictionary. The advanced dictionary builders can automatically
+ * shrink the dictionary for you, and select a the smallest size that
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
+ * A smaller dictionary can save memory, and potentially speed up
+ * compression.
+ *
+ * How many samples should I provide to the dictionary builder?
+ * ------------------------------------------------------------
+ *
+ * We generally recommend passing ~100x the size of the dictionary
+ * in samples. A few thousand should suffice. Having too few samples
+ * can hurt the dictionaries effectiveness. Having more samples will
+ * only improve the dictionaries effectiveness. But having too many
+ * samples can slow down the dictionary builder.
+ *
+ * How do I determine if a dictionary will be effective?
+ * -----------------------------------------------------
+ *
+ * Simply train a dictionary and try it out. You can use zstd's built in
+ * benchmarking tool to test the dictionary effectiveness.
+ *
+ *   # Benchmark levels 1-3 without a dictionary
+ *   zstd -b1e3 -r /path/to/my/files
+ *   # Benchmark levels 1-3 with a dictioanry
+ *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
+ *
+ * When should I retrain a dictionary?
+ * -----------------------------------
+ *
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
+ * effectiveness drops as the data you are compressing changes. Generally, we do
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
+ * at which they decay depends on your use case. Internally, we regularly
+ * retrain dictionaries, and if the new dictionary performs significantly
+ * better than the old dictionary, we will ship the new dictionary.
+ *
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
+ * -------------------------------------------------------------------------
+ *
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
+ * using a third-party dictionary builder, you can turn it into a zstd
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
+ * provide some samples of the data. It will add the zstd header to the
+ * raw content, which contains a dictionary ID and entropy tables, which
+ * will improve compression ratio, and allow zstd to write the dictionary ID
+ * into the frame, if you so choose.
+ *
+ * Do I have to use zstd's dictionary builder?
+ * -------------------------------------------
+ *
+ * No! You can construct dictionary content however you please, it is just
+ * bytes. It will always be valid as a raw content dictionary. If you want
+ * a zstd dictionary, which can improve compression ratio, use
+ * `ZDICT_finalizeDictionary()`.
+ *
+ * What is the attack surface of a zstd dictionary?
+ * ------------------------------------------------
+ *
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
+ * zstd should never crash, or access out-of-bounds memory no matter what
+ * the dictionary is. However, if an attacker can control the dictionary
+ * during decompression, they can cause zstd to generate arbitrary bytes,
+ * just like if they controlled the compressed data.
+ *
+ ******************************************************************************/
+
 
 /*! ZDICT_trainFromBuffer():
  *  Train a dictionary from an array of samples.
@@ -64,7 +203,14 @@
 typedef struct {
     int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
     unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
-    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value) */
+    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value)
+                                  *   NOTE: The zstd format reserves some dictionary IDs for future use.
+                                  *         You may use them in private settings, but be warned that they
+                                  *         may be used by zstd in a public dictionary registry in the future.
+                                  *         These dictionary IDs are:
+                                  *           - low range  : <= 32767
+                                  *           - high range : >= (2^31)
+                                  */
 } ZDICT_params_t;
 
 /*! ZDICT_finalizeDictionary():
@@ -264,10 +410,11 @@
  *  Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
  */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
-    void *dictBuffer, size_t dictBufferCapacity,
-    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+    void* dictBuffer, size_t dictBufferCapacity,
+    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
     ZDICT_legacy_params_t parameters);
 
+
 /* Deprecation warnings */
 /* It is generally possible to disable deprecation warnings from compiler,
    for example with -Wno-deprecated-declarations for gcc
diff --git a/lib/zstd.h b/lib/zstd.h
index 06e07f7..4651e6c 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -71,8 +71,8 @@
 
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  7
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  0
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 
 /*! ZSTD_versionNumber() :
@@ -109,7 +109,6 @@
 #define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
 
 
-
 /***************************************
 *  Simple API
 ***************************************/
@@ -166,7 +165,7 @@
  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
 
-/*! ZSTD_findFrameCompressedSize() :
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
  * `src` should point to the start of a ZSTD frame or skippable frame.
  * `srcSize` must be >= first frame size
  * @return : the compressed size of the first frame starting at `src`,
@@ -180,8 +179,9 @@
 ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
 ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
 
 
 /***************************************
@@ -199,7 +199,7 @@
  */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
 
 /*! ZSTD_compressCCtx() :
  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
@@ -222,7 +222,7 @@
  *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
 
 /*! ZSTD_decompressDCtx() :
  *  Same as ZSTD_decompress(),
@@ -234,9 +234,9 @@
                                  const void* src, size_t srcSize);
 
 
-/***************************************
-*  Advanced compression API
-***************************************/
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
 
 /* API design :
  *   Parameters are pushed one by one into an existing context,
@@ -266,7 +266,6 @@
                          Only the order (from fast to strong) is guaranteed */
 } ZSTD_strategy;
 
-
 typedef enum {
 
     /* compression parameters
@@ -332,7 +331,6 @@
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
-
     /* LDM mode parameters */
     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                      * This parameter is designed to improve compression ratio
@@ -389,7 +387,7 @@
     ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                               * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
                               * The minimum size is automatically and transparently enforced. */
     ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
@@ -419,6 +417,8 @@
      * ZSTD_c_stableOutBuffer
      * ZSTD_c_blockDelimiters
      * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -434,7 +434,10 @@
      ZSTD_c_experimentalParam9=1006,
      ZSTD_c_experimentalParam10=1007,
      ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
 } ZSTD_cParameter;
 
 typedef struct {
@@ -519,9 +522,9 @@
                              const void* src, size_t srcSize);
 
 
-/***************************************
-*  Advanced decompression API
-***************************************/
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
 
 /* The advanced API pushes parameters one by one into an existing DCtx context.
  * Parameters are sticky, and remain valid for all following frames
@@ -546,12 +549,14 @@
      * ZSTD_d_format
      * ZSTD_d_stableOutBuffer
      * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly
      */
      ZSTD_d_experimentalParam1=1000,
      ZSTD_d_experimentalParam2=1001,
-     ZSTD_d_experimentalParam3=1002
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
 
 } ZSTD_dParameter;
 
@@ -665,7 +670,7 @@
                                  /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
 /*===== ZSTD_CStream management functions =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
-ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
 
 /*===== Streaming compression functions =====*/
 typedef enum {
@@ -681,7 +686,7 @@
                         : note : multithreaded compression will block to flush as much output as possible. */
 } ZSTD_EndDirective;
 
-/*! ZSTD_compressStream2() :
+/*! ZSTD_compressStream2() : Requires v1.4.0+
  *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
  *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
@@ -727,11 +732,11 @@
 
 
 /* *****************************************************************************
- * This following is a legacy streaming API.
+ * This following is a legacy streaming API, available since v1.0+ .
  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
  * It is redundant, but remains fully supported.
- * Advanced parameters and dictionary compression can only be used through the
- * new API.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
  ******************************************************************************/
 
 /*!
@@ -786,7 +791,7 @@
                                  /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
 /*===== ZSTD_DStream management functions =====*/
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
-ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
 
 /*===== Streaming decompression functions =====*/
 
@@ -809,7 +814,7 @@
 /*! ZSTD_compress_usingDict() :
  *  Compression at an explicit compression level using a Dictionary.
  *  A dictionary can be any arbitrary data segment (also called a prefix),
- *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  or a buffer with specified information (see zdict.h).
  *  Note : This function loads the dictionary, resulting in significant startup delay.
  *         It's intended for a dictionary used only once.
  *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
@@ -852,7 +857,8 @@
                                          int compressionLevel);
 
 /*! ZSTD_freeCDict() :
- *  Function frees memory allocated by ZSTD_createCDict(). */
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
 /*! ZSTD_compress_usingCDict() :
@@ -874,7 +880,8 @@
 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 
 /*! ZSTD_freeDDict() :
- *  Function frees memory allocated with ZSTD_createDDict() */
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
 /*! ZSTD_decompress_usingDDict() :
@@ -890,19 +897,25 @@
  *  Dictionary helper functions
  *******************************/
 
-/*! ZSTD_getDictID_fromDict() :
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
  *  Provides the dictID stored within dictionary.
  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
  *  It can still be loaded, but as a content-only dictionary. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 
-/*! ZSTD_getDictID_fromDDict() :
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
  *  Provides the dictID of the dictionary loaded into `ddict`.
  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 
-/*! ZSTD_getDictID_fromFrame() :
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
  *  Provides the dictID required to decompressed the frame stored within `src`.
  *  If @return == 0, the dictID could not be decoded.
  *  This could for one of the following reasons :
@@ -916,7 +929,7 @@
 
 
 /*******************************************************************************
- * Advanced dictionary and prefix API
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
  *
  * This API allows dictionaries to be used with ZSTD_compress2(),
  * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
@@ -925,7 +938,7 @@
  ******************************************************************************/
 
 
-/*! ZSTD_CCtx_loadDictionary() :
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
  *  Create an internal CDict from `dict` buffer.
  *  Decompression will have to use same dictionary.
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -944,11 +957,11 @@
  *           to precisely select how dictionary content must be interpreted. */
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_CCtx_refCDict() :
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
  *  Reference a prepared dictionary, to be used for all next compressed frames.
  *  Note that compression parameters are enforced from within CDict,
  *  and supersede any compression parameter previously set within CCtx.
- *  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
  *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
  *  The dictionary will remain valid for future compressed frames using same CCtx.
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -958,7 +971,7 @@
  *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 
-/*! ZSTD_CCtx_refPrefix() :
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
  *  Reference a prefix (single-usage dictionary) for next compressed frame.
  *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
  *  Decompression will need same prefix to properly regenerate data.
@@ -979,7 +992,7 @@
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
                                  const void* prefix, size_t prefixSize);
 
-/*! ZSTD_DCtx_loadDictionary() :
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
  *  Create an internal DDict from dict buffer,
  *  to be used to decompress next frames.
  *  The dictionary remains valid for all future frames, until explicitly invalidated.
@@ -996,9 +1009,16 @@
  */
 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_DCtx_refDDict() :
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
  *  Reference a prepared dictionary, to be used to decompress next frames.
  *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
  *  Note 1 : Currently, only one dictionary can be managed.
  *           Referencing a new dictionary effectively "discards" any previous one.
@@ -1007,7 +1027,7 @@
  */
 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-/*! ZSTD_DCtx_refPrefix() :
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
  *  Reference a prefix (single-usage dictionary) to decompress next frame.
  *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
  *  and must use the same prefix as the one used during compression.
@@ -1028,7 +1048,7 @@
 
 /* ===   Memory management   === */
 
-/*! ZSTD_sizeof_*() :
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
  *  These functions give the _current_ memory usage of selected object.
  *  Note that object memory usage can evolve (increase or decrease) over time. */
 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
@@ -1053,6 +1073,28 @@
 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
 
+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
 /****************************************************************************************
  *   experimental API (static linking only)
  ****************************************************************************************
@@ -1206,6 +1248,12 @@
 } ZSTD_forceIgnoreChecksum_e;
 
 typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
     /* Note: this enum and the behavior it controls are effectively internal
      * implementation details of the compressor. They are expected to continue
      * to evolve and should be considered only in the context of extremely
@@ -1253,6 +1301,11 @@
   ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
 } ZSTD_literalCompressionMode_e;
 
+typedef enum {
+  ZSTD_urm_auto = 0,                   /* Automatically determine whether or not we use row matchfinder */
+  ZSTD_urm_disableRowMatchFinder = 1,  /* Never use row matchfinder */
+  ZSTD_urm_enableRowMatchFinder = 2    /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;
 
 /***************************************
 *  Frame size functions
@@ -1286,7 +1339,7 @@
  *  `srcSize` must be the _exact_ size of this series
  *       (i.e. there should be a frame boundary at `src + srcSize`)
  *  @return : - upper-bound for the decompressed size of all data in all successive frames
- *            - if an error occured: ZSTD_CONTENTSIZE_ERROR
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
  *
  *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
  *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
@@ -1372,6 +1425,23 @@
                                   const void* src, size_t srcSize);
 
 
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
+
+
 /***************************************
 *  Memory management
 ***************************************/
@@ -1506,13 +1576,14 @@
  * Note that the lifetime of such pool must exist while being used.
  * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
  * to use an internal thread pool).
- * ZSTD_freeThreadPool frees a thread pool.
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
  */
 typedef struct POOL_ctx_s ZSTD_threadPool;
 ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
-ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
 ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
 
+
 /*
  * This API is temporary and is expected to change or disappear in the future!
  */
@@ -1523,10 +1594,12 @@
     const ZSTD_CCtx_params* cctxParams,
     ZSTD_customMem customMem);
 
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
-                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
-                                                  ZSTD_dictContentType_e dictContentType,
-                                                  ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
+
 
 /***************************************
 *  Advanced compression functions
@@ -1540,12 +1613,6 @@
  *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 
-/*! ZSTD_getDictID_fromCDict() :
- *  Provides the dictID of the dictionary loaded into `cdict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
-
 /*! ZSTD_getCParams() :
  * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  * `estimatedSrcSize` value is optional, select 0 if not known */
@@ -1572,18 +1639,20 @@
 /*! ZSTD_compress_advanced() :
  *  Note : this function is now DEPRECATED.
  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
-ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                                           void* dst, size_t dstCapacity,
                                     const void* src, size_t srcSize,
                                     const void* dict,size_t dictSize,
                                           ZSTD_parameters params);
 
 /*! ZSTD_compress_usingCDict_advanced() :
- *  Note : this function is now REDUNDANT.
+ *  Note : this function is now DEPRECATED.
  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning in some future version */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                               void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize,
                                         const ZSTD_CDict* cdict,
@@ -1645,7 +1714,7 @@
 
 /* Controls how the literals are compressed (default is auto).
  * The value must be of type ZSTD_literalCompressionMode_e.
- * See ZSTD_literalCompressionMode_t enum definition for details.
+ * See ZSTD_literalCompressionMode_e enum definition for details.
  */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
@@ -1797,12 +1866,52 @@
  */
 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
 
+/* ZSTD_c_splitBlocks
+ * Default is 0 == disabled. Set to 1 to enable block splitting.
+ *
+ * Will attempt to split blocks in order to improve compression ratio at the cost of speed.
+ */
+#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
+
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
  * @return : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 
 
 /*! ZSTD_CCtx_params :
@@ -1817,13 +1926,13 @@
  *                                    These parameters will be applied to
  *                                    all subsequent frames.
  *  - ZSTD_compressStream2() : Do compression using the CCtx.
- *  - ZSTD_freeCCtxParams() : Free the memory.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
  *
  *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
  *  for static allocation of CCtx for single-threaded compression.
  */
 ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
 
 /*! ZSTD_CCtxParams_reset() :
  *  Reset params to default values.
@@ -1842,7 +1951,7 @@
  */
 ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 
-/*! ZSTD_CCtxParams_setParameter() :
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
  *  Similar to ZSTD_CCtx_setParameter.
  *  Set one compression parameter, selected by enum ZSTD_cParameter.
  *  Parameters must be applied to a ZSTD_CCtx using
@@ -1857,7 +1966,7 @@
  * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 
 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
  *  Apply a set of ZSTD_CCtx_params to the compression context.
@@ -1983,12 +2092,38 @@
  */
 #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
 
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+
+
 /*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
  *  Instruct the decoder context about what kind of data to decode next.
  *  This instruction is mandatory to decode data without a fully-formed header,
  *  such ZSTD_f_zstd1_magicless for example.
  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 
 /*! ZSTD_decompressStream_simpleArgs() :
  *  Same as ZSTD_decompressStream(),
@@ -2012,7 +2147,7 @@
 /*=====   Advanced Streaming compression functions  =====*/
 
 /*! ZSTD_initCStream_srcSize() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@@ -2021,15 +2156,15 @@
  * pledgedSrcSize must be correct. If it is not known at init time, use
  * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
  * "0" also disables frame content size field. It may be enabled in the future.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
                          int compressionLevel,
                          unsigned long long pledgedSrcSize);
 
 /*! ZSTD_initCStream_usingDict() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
@@ -2038,15 +2173,15 @@
  * dict == NULL or dictSize < 8, in which case no dict is used.
  * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
  * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
                      const void* dict, size_t dictSize,
                            int compressionLevel);
 
 /*! ZSTD_initCStream_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ * This function is DEPRECATED, and is approximately equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
  *     for ((param, value) : params) {
@@ -2058,23 +2193,24 @@
  * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
  * pledgedSrcSize must be correct.
  * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                           ZSTD_parameters params,
                           unsigned long long pledgedSrcSize);
 
 /*! ZSTD_initCStream_usingCDict() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
+ * 
  * note : cdict will just be referenced, and must outlive compression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
 
 /*! ZSTD_initCStream_usingCDict_advanced() :
  *   This function is DEPRECATED, and is approximately equivalent to:
@@ -2089,18 +2225,21 @@
  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
  * pledgedSrcSize must be correct. If srcSize is not known at init time, use
  * value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                                const ZSTD_CDict* cdict,
                                      ZSTD_frameParameters fParams,
                                      unsigned long long pledgedSrcSize);
 
 /*! ZSTD_resetCStream() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
  *
  *  start a new frame, using same parameters from previous frame.
  *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
@@ -2110,9 +2249,10 @@
  *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
  *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  * @return : 0, or an error code (which can be tested using ZSTD_isError())
- *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ *  This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
 
 typedef struct {
@@ -2199,8 +2339,7 @@
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
 
   Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
   It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
 
   Then, consume your input using ZSTD_compressContinue().
@@ -2225,15 +2364,17 @@
 /*=====   Buffer-less streaming compression functions  =====*/
 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
 
 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 /**
   Buffer-less streaming decompression (synchronous mode)
 
diff --git a/lib/common/zstd_errors.h b/lib/zstd_errors.h
similarity index 98%
rename from lib/common/zstd_errors.h
rename to lib/zstd_errors.h
index 6d0d003..fa3686b 100644
--- a/lib/common/zstd_errors.h
+++ b/lib/zstd_errors.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/.gitignore b/programs/.gitignore
index 662f708..2d4edbe 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -8,6 +8,7 @@
 zstd-small
 zstd-nolegacy
 zstd-dictBuilder
+zstd-dll
 
 # Object files
 *.o
diff --git a/programs/Makefile b/programs/Makefile
index 8641d0e..599fb02 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -61,8 +61,10 @@
             -Wstrict-prototypes -Wundef -Wpointer-arith \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
             -Wredundant-decls -Wmissing-prototypes -Wc++-compat
-CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
-FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CFLAGS   += $(DEBUGFLAGS)
+CPPFLAGS += $(MOREFLAGS)
+LDFLAGS  += $(MOREFLAGS)
+FLAGS     = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS)
 
 ZSTDLIB_COMMON := $(ZSTDDIR)/common
 ZSTDLIB_COMPRESS := $(ZSTDDIR)/compress
@@ -88,28 +90,33 @@
 
 # Sort files in alphabetical order for reproducible builds
 ZSTDLIB_FULL_SRC = $(sort $(ZSTDLIB_CORE_SRC) $(ZSTDLEGACY_SRC) $(ZDICT_SRC))
-ZSTDLIB_LOCAL_SRC := $(notdir $(ZSTDLIB_FULL_SRC))
+ZSTDLIB_LOCAL_SRC = $(notdir $(ZSTDLIB_FULL_SRC))
 ZSTDLIB_LOCAL_OBJ := $(ZSTDLIB_LOCAL_SRC:.c=.o)
 
 ZSTD_CLI_SRC := $(wildcard *.c)
 ZSTD_CLI_OBJ := $(ZSTD_CLI_SRC:.c=.o)
 
-ZSTD_ALL_SRC := $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC)
+ZSTD_ALL_SRC = $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC)
 ZSTD_ALL_OBJ := $(ZSTD_ALL_SRC:.c=.o)
 
 UNAME := $(shell uname)
+
+ifndef BUILD_DIR
 ifeq ($(UNAME), Darwin)
-  HASH ?= md5
+  ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
+    HASH ?= md5
+  endif
 else ifeq ($(UNAME), FreeBSD)
   HASH ?= gmd5sum
+else ifeq ($(UNAME), NetBSD)
+  HASH ?= md5 -n
 else ifeq ($(UNAME), OpenBSD)
   HASH ?= md5
 endif
 HASH ?= md5sum
 HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
 
-ifndef BUILD_DIR
-HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ")
+HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ")
 ifeq ($(HAVE_HASH),0)
   $(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
   BUILD_DIR := obj/generic_noconf
@@ -192,11 +199,13 @@
 endif
 
 SET_CACHE_DIRECTORY = \
-	$(MAKE) --no-print-directory $@ \
+   +$(MAKE) --no-print-directory $@ \
     BUILD_DIR=obj/$(HASH_DIR) \
     CPPFLAGS="$(CPPFLAGS)" \
     CFLAGS="$(CFLAGS)" \
-    LDFLAGS="$(LDFLAGS)"
+    LDFLAGS="$(LDFLAGS)" \
+    LDLIBS="$(LDLIBS)" \
+    ZSTD_ALL_SRC="$(ZSTD_ALL_SRC)"
 
 
 .PHONY: all
@@ -207,7 +216,8 @@
 
 .PHONY: zstd  # must always be run
 zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
-zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
+zstd : LDFLAGS += $(THREAD_LD) $(DEBUGFLAGS_LD)
+zstd : LDLIBS += $(ZLIBLD) $(LZMALD) $(LZ4LD)
 zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
 ifneq (,$(filter Windows%,$(OS)))
 zstd : $(RES_FILE)
@@ -229,7 +239,7 @@
 	@echo "$(LZMA_MSG)"
 	@echo "$(LZ4_MSG)"
 	@echo LINK $@
-	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
+	$(CC) $(FLAGS) $^ $(LDLIBS) -o $@$(EXT)
 
 ifeq ($(HAVE_HASH),1)
 SRCBIN_HASH = $(shell cat $(BUILD_DIR)/zstd 2> $(VOID) | $(HASH) | cut -f 1 -d " ")
@@ -269,36 +279,34 @@
 zstd-nolegacy : $(ZSTDLIB_CORE_SRC) $(ZDICT_SRC) $(ZSTD_CLI_OBJ)
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
+.PHONY: zstd-nomt
 zstd-nomt : THREAD_CPP :=
 zstd-nomt : THREAD_LD  :=
 zstd-nomt : THREAD_MSG := - multi-threading disabled
 zstd-nomt : zstd
 
+.PHONY: zstd-nogz
 zstd-nogz : ZLIBCPP :=
 zstd-nogz : ZLIBLD  :=
 zstd-nogz : ZLIB_MSG := - gzip support is disabled
 zstd-nogz : zstd
 
+.PHONY: zstd-noxz
 zstd-noxz : LZMACPP :=
 zstd-noxz : LZMALD  :=
 zstd-noxz : LZMA_MSG := - xz/lzma support is disabled
 zstd-noxz : zstd
 
-## zstd-dll: zstd executable linked to dynamic library libzstd (must already exist)
-# note : the following target doesn't link
-#        because zstd uses non-public symbols from libzstd
-#        such as XXH64 (for benchmark),
-#        ZDICT_trainFromBuffer_unsafe_legacy (for dictionary builder)
-#        and ZSTD_cycleLog (likely for --patch-from).
-#        It's unclear at this stage if this is a scenario that must be supported
+## zstd-dll: zstd executable linked to dynamic library libzstd (must have same version)
 .PHONY: zstd-dll
-zstd-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
-zstd-dll : ZSTDLIB_FULL_SRC =
-zstd-dll : $(ZSTD_CLI_OBJ)
-	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
+zstd-dll : LDFLAGS+= -L$(ZSTDDIR)
+zstd-dll : LDLIBS += -lzstd
+zstd-dll : ZSTDLIB_LOCAL_SRC = xxhash.c
+zstd-dll : zstd
 
 
 ## zstd-pgo: zstd executable optimized with PGO.
+.PHONY: zstd-pgo
 zstd-pgo :
 	$(MAKE) clean
 	$(MAKE) zstd MOREFLAGS=-fprofile-generate
@@ -315,16 +323,16 @@
 ## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format.
 zstd-small: CFLAGS = -Os -s
 zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c
-	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT)
+	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOTRACE $^ -o $@$(EXT)
 
 zstd-decompress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_DECOMPRESS_C) zstdcli.c util.c timefn.c fileio.c
-	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
+	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT)
 
 zstd-compress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) zstdcli.c util.c timefn.c fileio.c
-	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
+	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT)
 
 ## zstd-dictBuilder: executable supporting dictionary creation and compression (only)
-zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS
+zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS -DZSTD_NOTRACE
 zstd-dictBuilder: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c dibio.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
@@ -346,9 +354,11 @@
 .PHONY: clean
 clean:
 	$(RM) core *.o tmp* result* *.gcda dictionary *.zst \
-        zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \
+        zstd$(EXT) zstd32$(EXT) zstd-dll$(EXT) \
+        zstd-compress$(EXT) zstd-decompress$(EXT) \
         zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \
-        zstd-dictBuilder$(EXT) *.gcda default*.profraw default.profdata have_zlib$(EXT)
+        zstd-dictBuilder$(EXT) \
+        *.gcda default*.profraw default.profdata have_zlib$(EXT)
 	$(RM) -r obj/*
 	@echo Cleaning completed
 
diff --git a/programs/README.md b/programs/README.md
index cf7f5ba..7fd7104 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -224,7 +224,8 @@
 that `zstd` will use for compression, which by default is `1`.
 This functionality only exists when `zstd` is compiled with multithread support.
 `0` means "use as many threads as detected cpu cores on local system".
-The max # of threads is capped at: `ZSTDMT_NBWORKERS_MAX==200`.
+The max # of threads is capped at `ZSTDMT_NBWORKERS_MAX`,
+which is either 64 in 32-bit mode, or 256 for 64-bit environments.
 
 This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments.
 One such scenario is `tar --zstd`.
diff --git a/programs/benchfn.c b/programs/benchfn.c
index ed7273a..1aadbdd 100644
--- a/programs/benchfn.c
+++ b/programs/benchfn.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/benchfn.h b/programs/benchfn.h
index e555bbe..590f292 100644
--- a/programs/benchfn.h
+++ b/programs/benchfn.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 7705620..49c0349 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,7 +36,7 @@
 #include "datagen.h"     /* RDG_genBuffer */
 #include "../lib/common/xxhash.h"
 #include "benchzstd.h"
-#include "../lib/common/zstd_errors.h"
+#include "../lib/zstd_errors.h"
 
 
 /* *************************************
@@ -67,18 +67,10 @@
 /* *************************************
 *  console display
 ***************************************/
-#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
 
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
-            { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (displayLevel>=4) fflush(stderr); } } }
-
 
 /* *************************************
 *  Exceptions
@@ -137,7 +129,8 @@
         0, /* ldmHashLog */
         0, /* ldmBuckSizeLog */
         0,  /* ldmHashRateLog */
-        ZSTD_lcm_auto /* literalCompressionMode */
+        ZSTD_lcm_auto, /* literalCompressionMode */
+        0 /* useRowMatchFinder */
     };
     return res;
 }
@@ -175,6 +168,7 @@
         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
     }
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
@@ -766,7 +760,7 @@
         }
         {   FILE* const f = fopen(fileNamesTable[n], "rb");
             if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
-            DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
+            DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
             if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
             {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
                 if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index 8c55b3c..9b40dcc 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -117,6 +117,7 @@
     int ldmBucketSizeLog;
     int ldmHashRateLog;
     ZSTD_literalCompressionMode_e literalCompressionMode;
+    int useRowMatchFinder;  /* use row-based matchfinder if possible */
 } BMK_advancedParams_t;
 
 /* returns default parameters used by nonAdvanced functions */
diff --git a/programs/datagen.c b/programs/datagen.c
index 4353b7f..3b4f9e5 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/datagen.h b/programs/datagen.h
index 5a2682d..b76ae2a 100644
--- a/programs/datagen.h
+++ b/programs/datagen.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/dibio.c b/programs/dibio.c
index cb3829e..d6c9f6d 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -255,18 +255,6 @@
 }
 
 
-/*! ZDICT_trainFromBuffer_unsafe_legacy() :
-    Strictly Internal use only !!
-    Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`.
-    `samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
-              or an error code.
-*/
-size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity,
-                                           const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                           ZDICT_legacy_params_t parameters);
-
-
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                        const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
                        ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
@@ -319,9 +307,9 @@
     {   size_t dictSize;
         if (params) {
             DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
-            dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize,
-                                                           srcBuffer, sampleSizes, fs.nbSamples,
-                                                           *params);
+            dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize,
+                                                    srcBuffer, sampleSizes, fs.nbSamples,
+                                                    *params);
         } else if (coverParams) {
             if (optimize) {
               dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
diff --git a/programs/dibio.h b/programs/dibio.h
index 682723d..f65ed9b 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,7 +19,7 @@
 *  Dependencies
 ***************************************/
 #define ZDICT_STATIC_LINKING_ONLY
-#include "../lib/dictBuilder/zdict.h"     /* ZDICT_params_t */
+#include "../lib/zdict.h"     /* ZDICT_params_t */
 
 
 /*-*************************************
diff --git a/programs/fileio.c b/programs/fileio.c
index 65f2d53..5693ac3 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -25,9 +25,10 @@
 ***************************************/
 #include "platform.h"   /* Large Files support, SET_BINARY_MODE */
 #include "util.h"       /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
-#include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
+#include <stdio.h>      /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
 #include <stdlib.h>     /* malloc, free */
 #include <string.h>     /* strcmp, strlen */
+#include <fcntl.h>      /* O_WRONLY */
 #include <assert.h>
 #include <errno.h>      /* errno */
 #include <limits.h>     /* INT_MAX */
@@ -44,8 +45,7 @@
 
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
 #include "../lib/zstd.h"
-#include "../lib/common/zstd_errors.h"  /* ZSTD_error_frameParameter_windowTooLarge */
-#include "../lib/compress/zstd_compress_internal.h"
+#include "../lib/zstd_errors.h"  /* ZSTD_error_frameParameter_windowTooLarge */
 
 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
 #  include <zlib.h>
@@ -74,16 +74,29 @@
 
 #define FNSPACE 30
 
+/* Default file permissions 0666 (modulated by umask) */
+#if !defined(_WIN32)
+/* These macros aren't defined on windows. */
+#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
+#else
+#define DEFAULT_FILE_PERMISSIONS (0666)
+#endif
+
 /*-*************************************
 *  Macros
 ***************************************/
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+#undef MAX
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
 
 struct FIO_display_prefs_s {
     int displayLevel;   /* 0 : no display;  1: errors;  2: + result + interaction + warnings;  3: + progression;  4: + information */
-    U32 noProgress;
+    FIO_progressSetting_e progressSetting;
 };
 
-static FIO_display_prefs_t g_display_prefs = {2, 0};
+static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
 
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
 #define DISPLAYOUT(...)      fprintf(stdout, __VA_ARGS__)
@@ -92,10 +105,10 @@
 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 
-#define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
+#define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
 #define DISPLAYUPDATE(l, ...) {                              \
-        if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \
+        if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \
             if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
                 DELAY_NEXT_UPDATE();                         \
                 DISPLAY(__VA_ARGS__);                        \
@@ -294,6 +307,7 @@
     int blockSize;
     int overlapLog;
     U32 adaptiveMode;
+    U32 useRowMatchFinder;
     int rsyncable;
     int minAdaptLevel;
     int maxAdaptLevel;
@@ -319,6 +333,7 @@
     int excludeCompressedFiles;
     int patchFromMode;
     int contentSize;
+    int allowBlockDevices;
 };
 
 /*-*************************************
@@ -379,6 +394,7 @@
     ret->testMode = 0;
     ret->literalCompressionMode = ZSTD_lcm_auto;
     ret->excludeCompressedFiles = 0;
+    ret->allowBlockDevices = 0;
     return ret;
 }
 
@@ -414,7 +430,7 @@
 
 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
 
-void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; }
+void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
 
 
 /*-*************************************
@@ -446,6 +462,8 @@
 
 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
 
+void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
+
 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
     if (blockSize && prefs->nbWorkers==0)
         DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
@@ -464,6 +482,10 @@
     prefs->adaptiveMode = adapt;
 }
 
+void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
+    prefs->useRowMatchFinder = useRowMatchFinder;
+}
+
 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
     if ((rsyncable>0) && (prefs->nbWorkers==0))
         EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
@@ -584,11 +606,12 @@
 }
 
 /** FIO_openSrcFile() :
- *  condition : `srcFileName` must be non-NULL.
+ *  condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
  * @result : FILE* to `srcFileName`, or NULL if it fails */
-static FILE* FIO_openSrcFile(const char* srcFileName)
+static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName)
 {
     stat_t statbuf;
+    int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
     assert(srcFileName != NULL);
     if (!strcmp (srcFileName, stdinmark)) {
         DISPLAYLEVEL(4,"Using stdin for input \n");
@@ -604,6 +627,7 @@
 
     if (!UTIL_isRegularFileStat(&statbuf)
      && !UTIL_isFIFOStat(&statbuf)
+     && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf))
     ) {
         DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
                         srcFileName);
@@ -622,7 +646,8 @@
  * @result : FILE* to `dstFileName`, or NULL if it fails */
 static FILE*
 FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
-                const char* srcFileName, const char* dstFileName)
+                const char* srcFileName, const char* dstFileName,
+                const int mode)
 {
     if (prefs->testMode) return NULL;  /* do not open file in test mode */
 
@@ -649,7 +674,6 @@
 
     if (UTIL_isRegularFile(dstFileName)) {
         /* Check if destination file already exists */
-        FILE* const fCheck = fopen( dstFileName, "rb" );
 #if !defined(_WIN32)
         /* this test does not work on Windows :
          * `NUL` and `nul` are detected as regular files */
@@ -658,31 +682,41 @@
                         dstFileName);
         }
 #endif
-        if (fCheck != NULL) {  /* dst file exists, authorization prompt */
-            fclose(fCheck);
-            if (!prefs->overwrite) {
-                if (g_display_prefs.displayLevel <= 1) {
-                    /* No interaction possible */
-                    DISPLAY("zstd: %s already exists; not overwritten  \n",
-                            dstFileName);
-                    return NULL;
-                }
-                DISPLAY("zstd: %s already exists; ", dstFileName);
-                if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten  \n", "yY", fCtx->hasStdinInput))
-                    return NULL;
+        if (!prefs->overwrite) {
+            if (g_display_prefs.displayLevel <= 1) {
+                /* No interaction possible */
+                DISPLAY("zstd: %s already exists; not overwritten  \n",
+                        dstFileName);
+                return NULL;
             }
-            /* need to unlink */
-            FIO_removeFile(dstFileName);
-    }   }
+            DISPLAY("zstd: %s already exists; ", dstFileName);
+            if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten  \n", "yY", fCtx->hasStdinInput))
+                return NULL;
+        }
+        /* need to unlink */
+        FIO_removeFile(dstFileName);
+    }
 
-    {   FILE* const f = fopen( dstFileName, "wb" );
+    {
+#if defined(_WIN32)
+        /* Windows requires opening the file as a "binary" file to avoid
+         * mangling. This macro doesn't exist on unix. */
+        const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
+        const int fd = _open(dstFileName, openflags, mode);
+        FILE* f = NULL;
+        if (fd != -1) {
+            f = _fdopen(fd, "wb");
+        }
+#else
+        const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
+        const int fd = open(dstFileName, openflags, mode);
+        FILE* f = NULL;
+        if (fd != -1) {
+            f = fdopen(fd, "wb");
+        }
+#endif
         if (f == NULL) {
             DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
-        } else if (srcFileName != NULL
-               && strcmp (srcFileName, stdinmark)
-               && strcmp(dstFileName, nulmark) ) {
-            /* reduce rights on newly created dst file while compression is ongoing */
-            UTIL_chmod(dstFileName, NULL, 00600);
         }
         return f;
     }
@@ -840,7 +874,7 @@
 /* FIO_removeMultiFilesWarning() :
  * Returns 1 if the console should abort, 0 if console should proceed.
  * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts.
- * 
+ *
  * If -f is specified, or there is just 1 file, zstd will always proceed as usual.
  * If --rm is specified, there will be a prompt asking for user confirmation.
  *         If -f is specified with --rm, zstd will proceed as usual
@@ -897,6 +931,15 @@
     ZSTD_CStream* cctx;
 } cRess_t;
 
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    assert(hashLog > 1);
+    return hashLog - btScale;
+}
+
 static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
                                     ZSTD_compressionParameters* comprParams,
                                     unsigned long long const dictSize,
@@ -908,7 +951,7 @@
     FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
     if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
         DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
-    comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog);
+    comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
     if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
         if (!prefs->ldmFlag)
             DISPLAYLEVEL(1, "long mode automatically triggered\n");
@@ -976,6 +1019,7 @@
     if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
     }
+    CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
     /* compression parameters */
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
@@ -983,7 +1027,7 @@
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
-    CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
+    CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
     /* multi-threading */
@@ -1350,7 +1394,7 @@
             /* display notification; and adapt compression level */
             if (READY_FOR_UPDATE()) {
                 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
-                double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
+                double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
 
                 /* display progress notifications */
                 if (g_display_prefs.displayLevel >= 3) {
@@ -1360,24 +1404,25 @@
                                 (unsigned)(zfp.consumed >> 20),
                                 (unsigned)(zfp.produced >> 20),
                                 cShare );
-                } else {   /* summarized notifications if == 2 */
-                    DISPLAYLEVEL(2, "\r%79s\r", "");    /* Clear out the current displayed line */
+                } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) {
+                    /* Require level 2 or forcibly displayed progress counter for summarized updates */
+                    DISPLAYLEVEL(1, "\r%79s\r", "");    /* Clear out the current displayed line */
                     if (fCtx->nbFilesTotal > 1) {
                         size_t srcFileNameSize = strlen(srcFileName);
                         /* Ensure that the string we print is roughly the same size each time */
                         if (srcFileNameSize > 18) {
                             const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
-                            DISPLAYLEVEL(2, "Compress: %u/%u files. Current: ...%s ",
-                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
+                            DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ",
+                                        fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
                         } else {
-                            DISPLAYLEVEL(2, "Compress: %u/%u files. Current: %*s ",
-                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
+                            DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ",
+                                        fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
                         }
                     }
-                    DISPLAYLEVEL(2, "Read : %2u ", (unsigned)(zfp.consumed >> 20));
+                    DISPLAYLEVEL(1, "Read : %2u ", (unsigned)(zfp.consumed >> 20));
                     if (fileSize != UTIL_FILESIZE_UNKNOWN)
                         DISPLAYLEVEL(2, "/ %2u ", (unsigned)(fileSize >> 20));
-                    DISPLAYLEVEL(2, "MB ==> %2.f%%", cShare);
+                    DISPLAYLEVEL(1, "MB ==> %2.f%%", cShare);
                     DELAY_NEXT_UPDATE();
                 }
 
@@ -1499,7 +1544,7 @@
     U64 readsize = 0;
     U64 compressedfilesize = 0;
     U64 const fileSize = UTIL_getFileSize(srcFileName);
-    DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize);
+    DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
 
     /* compression format selection */
     switch (prefs->compressionType) {
@@ -1545,7 +1590,7 @@
     fCtx->totalBytesOutput += (size_t)compressedfilesize;
     DISPLAYLEVEL(2, "\r%79s\r", "");
     if (g_display_prefs.displayLevel >= 2 &&
-        !fCtx->hasStdoutOutput && 
+        !fCtx->hasStdoutOutput &&
         (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
         if (readsize == 0) {
             DISPLAYLEVEL(2,"%-20s :  (%6llu => %6llu bytes, %s) \n",
@@ -1555,7 +1600,7 @@
         } else {
             DISPLAYLEVEL(2,"%-20s :%6.2f%%   (%6llu => %6llu bytes, %s) \n",
                 srcFileName,
-                (double)compressedfilesize / readsize * 100,
+                (double)compressedfilesize / (double)readsize * 100,
                 (unsigned long long)readsize, (unsigned long long) compressedfilesize,
                 dstFileName);
         }
@@ -1593,23 +1638,24 @@
     int closeDstFile = 0;
     int result;
     stat_t statbuf;
-    int transfer_permissions = 0;
     assert(ress.srcFile != NULL);
     if (ress.dstFile == NULL) {
+        int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
+        if ( strcmp (srcFileName, stdinmark)
+          && UTIL_stat(srcFileName, &statbuf)
+          && UTIL_isRegularFileStat(&statbuf) ) {
+            dstFilePermissions = statbuf.st_mode;
+        }
+
         closeDstFile = 1;
         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName);
+        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
         if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
         /* Must only be added after FIO_openDstFile() succeeds.
          * Otherwise we may delete the destination file if it already exists,
          * and the user presses Ctrl-C when asked if they wish to overwrite.
          */
         addHandler(dstFileName);
-
-        if ( strcmp (srcFileName, stdinmark)
-          && UTIL_stat(srcFileName, &statbuf)
-          && UTIL_isRegularFileStat(&statbuf) )
-            transfer_permissions = 1;
     }
 
     result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
@@ -1629,11 +1675,6 @@
           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
           ) {
             FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
-        } else if (transfer_permissions) {
-            DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transferring permissions into dst: %s \n", dstFileName);
-            UTIL_setFileStat(dstFileName, &statbuf);
-        } else {
-            DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: do not transfer permissions into dst: %s \n", dstFileName);
         }
     }
 
@@ -1692,7 +1733,7 @@
         return 0;
     }
 
-    ress.srcFile = FIO_openSrcFile(srcFileName);
+    ress.srcFile = FIO_openSrcFile(prefs, srcFileName);
     if (ress.srcFile == NULL) return 1;   /* srcFile could not be opened */
 
     result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
@@ -1795,7 +1836,7 @@
     int status;
     int error = 0;
     cRess_t ress = FIO_createCResources(prefs, dictFileName,
-        FIO_getLargestFileSize(inFileNamesTable, fCtx->nbFilesTotal),
+        FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
         compressionLevel, comprParams);
 
     /* init */
@@ -1805,7 +1846,7 @@
             FIO_freeCResources(&ress);
             return 1;
         }
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
+        ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
         if (ress.dstFile == NULL) {  /* could not open outFileName */
             error = 1;
         } else {
@@ -1821,7 +1862,7 @@
         }
     } else {
         if (outMirroredRootDirName)
-            UTIL_mirrorSourceFilesDirectories(inFileNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName);
+            UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
 
         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
             const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
@@ -1845,7 +1886,7 @@
         }
 
         if (outDirName)
-            FIO_checkFilenameCollisions(inFileNamesTable , fCtx->nbFilesTotal);
+            FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
     }
 
     if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) {
@@ -1892,7 +1933,7 @@
         EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
     CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
     CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
-    
+
     ress.srcBufferSize = ZSTD_DStreamInSize();
     ress.srcBuffer = malloc(ress.srcBufferSize);
     ress.dstBufferSize = ZSTD_DStreamOutSize();
@@ -2099,7 +2140,7 @@
         if (srcFileLength>20) srcFileName += srcFileLength-20;
     }
 
-    ZSTD_resetDStream(ress->dctx);
+    ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
 
     /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
     {   size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
@@ -2124,7 +2165,7 @@
         /* Write block */
         storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
         frameSize += outBuff.pos;
-        if (!fCtx->hasStdoutOutput) {
+        if (!fCtx->hasStdoutOutput || g_display_prefs.progressSetting == FIO_ps_always) {
             if (fCtx->nbFilesTotal > 1) {
                 size_t srcFileNameSize = strlen(srcFileName);
                 if (srcFileNameSize > 18) {
@@ -2495,13 +2536,19 @@
 {
     int result;
     stat_t statbuf;
-    int transfer_permissions = 0;
     int releaseDstFile = 0;
 
     if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
+        int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
+        if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
+          && UTIL_stat(srcFileName, &statbuf)
+          && UTIL_isRegularFileStat(&statbuf) ) {
+            dstFilePermissions = statbuf.st_mode;
+        }
+
         releaseDstFile = 1;
 
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName);
+        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
         if (ress.dstFile==NULL) return 1;
 
         /* Must only be added after FIO_openDstFile() succeeds.
@@ -2509,11 +2556,6 @@
          * and the user presses Ctrl-C when asked if they wish to overwrite.
          */
         addHandler(dstFileName);
-
-        if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
-          && UTIL_stat(srcFileName, &statbuf)
-          && UTIL_isRegularFileStat(&statbuf) )
-            transfer_permissions = 1;
     }
 
     result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
@@ -2531,8 +2573,6 @@
           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
           ) {
             FIO_removeFile(dstFileName);  /* remove decompression artefact; note: don't do anything special if remove() fails */
-        } else if ( transfer_permissions /* file permissions correctly extracted from src */ ) {
-            UTIL_setFileStat(dstFileName, &statbuf);  /* transfer file permissions from src into dst */
         }
     }
 
@@ -2555,7 +2595,7 @@
         return 1;
     }
 
-    srcFile = FIO_openSrcFile(srcFileName);
+    srcFile = FIO_openSrcFile(prefs, srcFileName);
     if (srcFile==NULL) return 1;
     ress.srcBufferLoaded = 0;
 
@@ -2734,7 +2774,7 @@
             return 1;
         }
         if (!prefs->testMode) {
-            ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
+            ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
             if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
         }
         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
@@ -2747,7 +2787,7 @@
                         strerror(errno));
     } else {
         if (outMirroredRootDirName)
-            UTIL_mirrorSourceFilesDirectories(srcNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName);
+            UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
 
         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {   /* create dstFileName */
             const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
@@ -2769,9 +2809,9 @@
             error |= status;
         }
         if (outDirName)
-            FIO_checkFilenameCollisions(srcNamesTable , fCtx->nbFilesTotal);
+            FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
     }
-    
+
     if (fCtx->nbFilesProcessed >= 1  && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0)
         DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
 
@@ -2905,7 +2945,7 @@
 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
 {
     InfoError status;
-    FILE* const srcFile = FIO_openSrcFile(inFileName);
+    FILE* const srcFile = FIO_openSrcFile(NULL, inFileName);
     ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
 
     info->compressedSize = UTIL_getFileSize(inFileName);
@@ -2938,7 +2978,7 @@
     double const windowSizeUnit = (double)info->windowSize / unit;
     double const compressedSizeUnit = (double)info->compressedSize / unit;
     double const decompressedSizeUnit = (double)info->decompressedSize / unit;
-    double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize;
+    double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
     const char* const checkString = (info->usesCheck ? "XXH64" : "None");
     if (displayLevel <= 2) {
         if (!info->decompUnavailable) {
@@ -3059,7 +3099,7 @@
             const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
             double const compressedSizeUnit = (double)total.compressedSize / unit;
             double const decompressedSizeUnit = (double)total.decompressedSize / unit;
-            double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize;
+            double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
             const char* const checkString = (total.usesCheck ? "XXH64" : "");
             DISPLAYOUT("----------------------------------------------------------------- \n");
             if (total.decompUnavailable) {
diff --git a/programs/fileio.h b/programs/fileio.h
index 05e6d06..9d97ec8 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -68,6 +68,8 @@
 
 typedef struct FIO_display_prefs_s FIO_display_prefs_t;
 
+typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e;
+
 /*-*************************************
 *  Parameters
 ***************************************/
@@ -77,6 +79,7 @@
 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt);
 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel);
 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel);
+void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder);
 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize);
 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag);
 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag);
@@ -99,9 +102,10 @@
         FIO_prefs_t* const prefs,
         ZSTD_literalCompressionMode_e mode);
 
-void FIO_setNoProgress(unsigned noProgress);
+void FIO_setProgressSetting(FIO_progressSetting_e progressSetting);
 void FIO_setNotificationLevel(int level);
 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
+void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices);
 void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value);
 void FIO_setContentSize(FIO_prefs_t* const prefs, int value);
 
diff --git a/programs/platform.h b/programs/platform.h
index 68be70b..b858e3b 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -22,6 +22,7 @@
 ****************************************/
 #if defined(_MSC_VER)
 #  define _CRT_SECURE_NO_WARNINGS    /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
+#  define _CRT_NONSTDC_NO_WARNINGS   /* Disable C4996 complaining about posix function names */
 #  if (_MSC_VER <= 1800)             /* 1800 == Visual Studio 2013 */
 #    define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */
 #    define snprintf sprintf_s       /* snprintf unsupported by Visual <= 2013 */
diff --git a/programs/timefn.c b/programs/timefn.c
index 95460d0..64577b0 100644
--- a/programs/timefn.c
+++ b/programs/timefn.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/timefn.h b/programs/timefn.h
index 5d2818e..3fcd78a 100644
--- a/programs/timefn.h
+++ b/programs/timefn.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/util.c b/programs/util.c
index 5386d00..8d190c6 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -260,6 +260,17 @@
     return 0;
 }
 
+/* UTIL_isBlockDevStat : distinguish named pipes */
+int UTIL_isBlockDevStat(const stat_t* statbuf)
+{
+/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
+#if PLATFORM_POSIX_VERSION >= 200112L
+    if (S_ISBLK(statbuf->st_mode)) return 1;
+#endif
+    (void)statbuf;
+    return 0;
+}
+
 int UTIL_isLink(const char* infilename)
 {
 /* macro guards, as defined in : https://linux.die.net/man/2/lstat */
@@ -312,9 +323,7 @@
 static size_t readLineFromFile(char* buf, size_t len, FILE* file)
 {
     assert(!feof(file));
-    /* Work around Cygwin problem when len == 1 it returns NULL. */
-    if (len <= 1) return 0;
-    CONTROL( fgets(buf, (int) len, file) );
+    if ( fgets(buf, (int) len, file) == NULL ) return 0;
     {   size_t linelen = strlen(buf);
         if (strlen(buf)==0) return 0;
         if (buf[linelen-1] == '\n') linelen--;
@@ -670,7 +679,27 @@
 
 static int pathnameHas2Dots(const char *pathname)
 {
-    return NULL != strstr(pathname, "..");
+    /* We need to figure out whether any ".." present in the path is a whole
+     * path token, which is the case if it is bordered on both sides by either
+     * the beginning/end of the path or by a directory separator.
+     */
+    const char *needle = pathname;
+    while (1) {
+        needle = strstr(needle, "..");
+
+        if (needle == NULL) {
+            return 0;
+        }
+
+        if ((needle == pathname || needle[-1] == PATH_SEP)
+         && (needle[2] == '\0' || needle[2] == PATH_SEP)) {
+            return 1;
+        }
+
+        /* increment so we search for the next match */
+        needle++;
+    };
+    return 0;
 }
 
 static int isFileNameValidForMirroredOutput(const char *filename)
@@ -954,7 +983,7 @@
 }
 
 FileNamesTable*
-UTIL_createExpandedFNT(const char** inputNames, size_t nbIfns, int followLinks)
+UTIL_createExpandedFNT(const char* const* inputNames, size_t nbIfns, int followLinks)
 {
     unsigned nbFiles;
     char* buf = (char*)malloc(LIST_SIZE_INCREASE);
@@ -1183,12 +1212,17 @@
                 /* fall back on the sysconf value */
                 goto failed;
         }   }
-        if (siblings && cpu_cores) {
+        if (siblings && cpu_cores && siblings > cpu_cores) {
             ratio = siblings / cpu_cores;
         }
+
+        if (ratio && numPhysicalCores > ratio) {
+            numPhysicalCores = numPhysicalCores / ratio;
+        }
+
 failed:
         fclose(cpuinfo);
-        return numPhysicalCores = numPhysicalCores / ratio;
+        return numPhysicalCores;
     }
 }
 
diff --git a/programs/util.h b/programs/util.h
index 25fa3f5..24cce44 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -143,6 +143,7 @@
 int UTIL_isRegularFileStat(const stat_t* statbuf);
 int UTIL_isDirectoryStat(const stat_t* statbuf);
 int UTIL_isFIFOStat(const stat_t* statbuf);
+int UTIL_isBlockDevStat(const stat_t* statbuf);
 U64 UTIL_getFileSizeStat(const stat_t* statbuf);
 
 /**
@@ -272,7 +273,7 @@
  *        or NULL in case of error
  */
 FileNamesTable*
-UTIL_createExpandedFNT(const char** filenames, size_t nbFilenames, int followLinks);
+UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks);
 
 
 /*-****************************************
diff --git a/programs/windres/verrsrc.h b/programs/windres/verrsrc.h
index 9815648..c1b60e9 100644
--- a/programs/windres/verrsrc.h
+++ b/programs/windres/verrsrc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/zstd.1 b/programs/zstd.1
index 0335b17..861f938 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "December 2020" "zstd 1.4.7" "User Commands"
+.TH "ZSTD" "1" "May 2021" "zstd 1.5.0" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -105,7 +105,7 @@
 \fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
 .
 .IP "\(bu" 4
-\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBWORKERS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
+\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
 .
 .IP "\(bu" 4
 \fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
@@ -156,7 +156,7 @@
 \fB\-o FILE\fR: save result into \fBFILE\fR
 .
 .IP "\(bu" 4
-\fB\-f\fR, \fB\-\-force\fR: overwrite output without prompting, and (de)compress symbolic links
+\fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\.
 .
 .IP "\(bu" 4
 \fB\-c\fR, \fB\-\-stdout\fR: force write to standard output, even if it is the console
@@ -218,7 +218,7 @@
 \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\.
 .
 .P
-\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \'ZSTD_NBTHREADS\fBhas a default value of (\fR1\fB), and is capped at ZSTDMT_NBWORKERS_MAX==200\.\fRzstd` must be compiled with multithread support for this to have any effect\.
+\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this to have any effect\.
 .
 .P
 They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
@@ -343,6 +343,9 @@
 .
 .SH "ADVANCED COMPRESSION OPTIONS"
 .
+.SS "\-B#:"
+Select the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to (slightly) different compressed frames\.
+.
 .SS "\-\-zstd[=options]:"
 \fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
 .
@@ -481,9 +484,6 @@
 .P
 \fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
 .
-.SS "\-B#:"
-Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
-.
 .SH "BUGS"
 Report bugs at: https://github\.com/facebook/zstd/issues
 .
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 73670da..ae50928 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -115,7 +115,8 @@
 * `-T#`, `--threads=#`:
     Compress using `#` working threads (default: 1).
     If `#` is 0, attempt to detect and use the number of physical CPU cores.
-    In all cases, the nb of threads is capped to ZSTDMT_NBWORKERS_MAX==200.
+    In all cases, the nb of threads is capped to `ZSTDMT_NBWORKERS_MAX`,
+    which is either 64 in 32-bit mode, or 256 for 64-bit environments.
     This modifier does nothing if `zstd` is compiled without multithread support.
 * `--single-thread`:
     Does not spawn a thread for compression, use a single thread for both I/O and compression.
@@ -201,7 +202,8 @@
 * `-o FILE`:
     save result into `FILE`
 * `-f`, `--force`:
-    overwrite output without prompting, and (de)compress symbolic links
+    disable input and output checks. Allows overwriting existing files, input
+    from console, output to stdout, operating on links, block devices, etc.
 * `-c`, `--stdout`:
     force write to standard output, even if it is the console
 * `--[no-]sparse`:
@@ -214,7 +216,7 @@
     This setting overrides default and can force sparse mode over stdout.
 * `--rm`:
     remove source file(s) after successful compression or decompression. If used in combination with
-    -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation. 
+    -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation.
 * `-k`, `--keep`:
     keep source file(s) after successful compression or decompression.
     This is the default behavior.
@@ -280,11 +282,11 @@
 
 `ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression.
 If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message.
-'ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be
+`ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be
 compiled with multithread support for this to have any effect.
 
 They can both be overridden by corresponding command line arguments:
-`-#` for compression level and `-T#` for number of compression threads. 
+`-#` for compression level and `-T#` for number of compression threads.
 
 
 DICTIONARY BUILDER
@@ -422,6 +424,16 @@
 
 ADVANCED COMPRESSION OPTIONS
 ----------------------------
+### -B#:
+Select the size of each compression job.
+This parameter is only available when multi-threading is enabled.
+Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
+Default job size varies depending on compression level (generally  `4 * windowSize`).
+`-B#` makes it possible to manually select a custom size.
+Note that job size must respect a minimum value which is enforced transparently.
+This minimum is either 512 KB, or `overlapSize`, whichever is largest.
+Different job sizes will lead to (slightly) different compressed frames.
+
 ### --zstd[=options]:
 `zstd` provides 22 predefined compression levels.
 The selected or default predefined compression level can be changed with
@@ -565,13 +577,6 @@
 
 `--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
 
-### -B#:
-Select the size of each compression job.
-This parameter is available only when multi-threading is enabled.
-Default value is `4 * windowSize`, which means it varies depending on compression level.
-`-B#` makes it possible to select a custom value.
-Note that job size must respect a minimum value which is enforced transparently.
-This minimum is either 1 MB, or `overlapSize`, whichever is largest.
 
 BUGS
 ----
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 9b6f915..239aaf4 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -42,6 +42,9 @@
 #ifndef ZSTD_NODICT
 #  include "dibio.h"  /* ZDICT_cover_params_t, DiB_trainFromFiles() */
 #endif
+#ifndef ZSTD_NOTRACE
+#  include "zstdcli_trace.h"
+#endif
 #include "../lib/zstd.h"  /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */
 
 
@@ -104,6 +107,24 @@
 
 
 /*-************************************
+*  Check Version (when CLI linked to dynamic library)
+**************************************/
+
+/* Due to usage of experimental symbols and capabilities by the CLI,
+ * the CLI must be linked against a dynamic library of same version */
+static void checkLibVersion(void)
+{
+    if (strcmp(ZSTD_VERSION_STRING, ZSTD_versionString())) {
+        DISPLAYLEVEL(1, "Error : incorrect library version (expecting : %s ; actual : %s ) \n",
+                    ZSTD_VERSION_STRING, ZSTD_versionString());
+        DISPLAYLEVEL(1, "Please update library to version %s, or use stand-alone zstd binary \n",
+                    ZSTD_VERSION_STRING);
+        exit(1);
+    }
+}
+
+
+/*-************************************
 *  Command Line
 **************************************/
 /* print help either in `stderr` or `stdout` depending on originating request
@@ -126,7 +147,9 @@
 #endif
     DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n");
     DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n");
-    DISPLAY_F(f, " -f     : overwrite output without prompting, also (de)compress links \n");
+    DISPLAY_F(f, " -f     : disable input and output checks. Allows overwriting existing files,\n");
+    DISPLAY_F(f, "          input from console, output to stdout, operating on links,\n");
+    DISPLAY_F(f, "          block devices, etc.\n");
     DISPLAY_F(f, "--rm    : remove source file(s) after successful de/compression \n");
     DISPLAY_F(f, " -k     : preserve source file(s) (default) \n");
     DISPLAY_F(f, " -h/-H  : display help/long help and exit \n");
@@ -144,7 +167,8 @@
 
     DISPLAYOUT( " -v     : verbose mode; specify multiple times to increase verbosity \n");
     DISPLAYOUT( " -q     : suppress warnings; specify twice to suppress errors too \n");
-    DISPLAYOUT( "--no-progress : do not display the progress counter \n");
+    DISPLAYOUT( "--[no-]progress : forcibly display, or never display the progress counter.\n");
+    DISPLAYOUT( "                  note: any (de)compressed output to terminal will mix with progress counter text. \n");
 
 #ifdef UTIL_HAS_CREATEFILELIST
     DISPLAYOUT( " -r     : operate recursively on directories \n");
@@ -167,6 +191,11 @@
     DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate).");
 #endif
 #endif /* ZSTD_NOCOMPRESS */
+
+#ifndef ZSTD_NOTRACE
+    DISPLAYOUT( "\n");
+    DISPLAYOUT( "--trace FILE : log tracing information to FILE.");
+#endif
     DISPLAYOUT( "\n");
 
     DISPLAYOUT( "--      : All arguments after \"--\" are treated as files \n");
@@ -178,6 +207,7 @@
     DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog);
     DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1);
     DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n");
+    DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n");
 # ifdef ZSTD_MULTITHREAD
     DISPLAYOUT( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
     DISPLAYOUT( " -B#    : select size of each job (default: 0==automatic) \n");
@@ -696,12 +726,15 @@
 {
     int argNb,
         followLinks = 0,
+        allowBlockDevices = 0,
+        forceStdin = 0,
         forceStdout = 0,
         hasStdout = 0,
         ldmFlag = 0,
         main_pause = 0,
         nbWorkers = 0,
         adapt = 0,
+        useRowMatchFinder = 0,
         adaptMin = MINCLEVEL,
         adaptMax = MAXCLEVEL,
         rsyncable = 0,
@@ -753,6 +786,7 @@
 
 
     /* init */
+    checkLibVersion();
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
     (void)memLimit;
     assert(argCount >= 1);
@@ -807,7 +841,7 @@
                 if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
                 if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
                 if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
-                if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; }
+                if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; continue; }
                 if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); }
                 if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); }
                 if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
@@ -828,6 +862,8 @@
                 if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; }
                 if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; }
                 if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
+                if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; }
+                if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; }
                 if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
                 if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
                 if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
@@ -844,7 +880,8 @@
                 if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
                 if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
                 if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
-                if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
+                if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; }
+                if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; }
                 if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
 
                 /* long commands with arguments */
@@ -898,6 +935,9 @@
 #ifdef UTIL_HAS_MIRRORFILELIST
                 if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; }
 #endif
+#ifndef ZSTD_NOTRACE
+                if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; }
+#endif
                 if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; }
                 if (longCommandWArg(&argument, "--long")) {
                     unsigned ldmWindowLog = 0;
@@ -988,7 +1028,7 @@
                 case 'D': argument++; NEXT_FIELD(dictFileName); break;
 
                     /* Overwrite */
-                case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break;
+                case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; argument++; break;
 
                     /* Verbose mode */
                 case 'v': g_displayLevel++; argument++; break;
@@ -1164,6 +1204,7 @@
         benchParams.ldmFlag = ldmFlag;
         benchParams.ldmMinMatch = (int)g_ldmMinMatch;
         benchParams.ldmHashLog = (int)g_ldmHashLog;
+        benchParams.useRowMatchFinder = useRowMatchFinder;
         if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
             benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
         }
@@ -1243,7 +1284,9 @@
         outFileName = stdoutmark;  /* when input is stdin, default output is stdout */
 
     /* Check if input/output defined as console; trigger an error in this case */
-    if (!strcmp(filenames->fileNames[0], stdinmark) && IS_CONSOLE(stdin) ) {
+    if (!forceStdin
+     && !strcmp(filenames->fileNames[0], stdinmark)
+     && IS_CONSOLE(stdin) ) {
         DISPLAYLEVEL(1, "stdin is a console, aborting\n");
         CLEAN_RETURN(1);
     }
@@ -1281,17 +1324,18 @@
         DISPLAY("error : can't use --patch-from=# on multiple files \n");
         CLEAN_RETURN(1);
     }
-    
-    /* No status message in pipe mode (stdin - stdout) */	
+
+    /* No status message in pipe mode (stdin - stdout) */
     hasStdout = outFileName && !strcmp(outFileName,stdoutmark);
 
     if (hasStdout && (g_displayLevel==2)) g_displayLevel=1;
 
     /* IO Stream/File */
     FIO_setHasStdoutOutput(fCtx, hasStdout);
-    FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize); 
+    FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize);
     FIO_determineHasStdinInput(fCtx, filenames);
     FIO_setNotificationLevel(g_displayLevel);
+    FIO_setAllowBlockDevices(prefs, allowBlockDevices);
     FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
     if (memLimit == 0) {
         if (compressionParams.windowLog == 0) {
@@ -1314,6 +1358,7 @@
         if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);
         if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);
         FIO_setAdaptiveMode(prefs, (unsigned)adapt);
+        FIO_setUseRowMatchFinder(prefs, useRowMatchFinder);
         FIO_setAdaptMin(prefs, adaptMin);
         FIO_setAdaptMax(prefs, adaptMax);
         FIO_setRsyncable(prefs, rsyncable);
@@ -1353,7 +1398,7 @@
         else
             operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */
+        (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */
         DISPLAY("Compression not supported \n");
 #endif
     } else {  /* decompression or test */
@@ -1374,6 +1419,9 @@
     if (main_pause) waitEnter();
     UTIL_freeFileNamesTable(filenames);
     UTIL_freeFileNamesTable(file_of_names);
+#ifndef ZSTD_NOTRACE
+    TRACE_finish();
+#endif
 
     return operationResult;
 }
diff --git a/programs/zstdcli_trace.c b/programs/zstdcli_trace.c
new file mode 100644
index 0000000..b3b977f
--- /dev/null
+++ b/programs/zstdcli_trace.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstdcli_trace.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "timefn.h"
+#include "util.h"
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "../lib/zstd.h"
+/* We depend on the trace header to avoid duplicating the ZSTD_trace struct.
+ * But, we check the version so it is compatible with dynamic linking.
+ */
+#include "../lib/common/zstd_trace.h"
+/* We only use macros from threading.h so it is compatible with dynamic linking */
+#include "../lib/common/threading.h"
+
+#if ZSTD_TRACE
+
+static FILE* g_traceFile = NULL;
+static int g_mutexInit = 0;
+static ZSTD_pthread_mutex_t g_mutex;
+static UTIL_time_t g_enableTime = UTIL_TIME_INITIALIZER;
+
+void TRACE_enable(char const* filename)
+{
+    int const writeHeader = !UTIL_isRegularFile(filename);
+    if (g_traceFile)
+        fclose(g_traceFile);
+    g_traceFile = fopen(filename, "a");
+    if (g_traceFile && writeHeader) {
+        /* Fields:
+        * algorithm
+        * version
+        * method
+        * streaming
+        * level
+        * workers
+        * dictionary size
+        * uncompressed size
+        * compressed size
+        * duration nanos
+        * compression ratio
+        * speed MB/s
+        */
+        fprintf(g_traceFile, "Algorithm, Version, Method, Mode, Level, Workers, Dictionary Size, Uncompressed Size, Compressed Size, Duration Nanos, Compression Ratio, Speed MB/s\n");
+    }
+    g_enableTime = UTIL_getTime();
+    if (!g_mutexInit) {
+        if (!ZSTD_pthread_mutex_init(&g_mutex, NULL)) {
+            g_mutexInit = 1;
+        } else {
+            TRACE_finish();
+        }
+    }
+}
+
+void TRACE_finish(void)
+{
+    if (g_traceFile) {
+        fclose(g_traceFile);
+    }
+    g_traceFile = NULL;
+    if (g_mutexInit) {
+        ZSTD_pthread_mutex_destroy(&g_mutex);
+        g_mutexInit = 0;
+    }
+}
+
+static void TRACE_log(char const* method, PTime duration, ZSTD_Trace const* trace)
+{
+    int level = 0;
+    int workers = 0;
+    double const ratio = (double)trace->uncompressedSize / (double)trace->compressedSize;
+    double const speed = ((double)trace->uncompressedSize * 1000) / (double)duration;
+    if (trace->params) {
+        ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_compressionLevel, &level);
+        ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_nbWorkers, &workers);
+    }
+    assert(g_traceFile != NULL);
+
+    ZSTD_pthread_mutex_lock(&g_mutex);
+    /* Fields:
+     * algorithm
+     * version
+     * method
+     * streaming
+     * level
+     * workers
+     * dictionary size
+     * uncompressed size
+     * compressed size
+     * duration nanos
+     * compression ratio
+     * speed MB/s
+     */
+    fprintf(g_traceFile,
+        "zstd, %u, %s, %s, %d, %d, %llu, %llu, %llu, %llu, %.2f, %.2f\n",
+        trace->version,
+        method,
+        trace->streaming ? "streaming" : "single-pass",
+        level,
+        workers,
+        (unsigned long long)trace->dictionarySize,
+        (unsigned long long)trace->uncompressedSize,
+        (unsigned long long)trace->compressedSize,
+        (unsigned long long)duration,
+        ratio,
+        speed);
+    ZSTD_pthread_mutex_unlock(&g_mutex);
+}
+
+/**
+ * These symbols override the weak symbols provided by the library.
+ */
+
+ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx)
+{
+    (void)cctx;
+    if (g_traceFile == NULL)
+        return 0;
+    return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime);
+}
+
+void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
+{
+    PTime const beginNanos = (PTime)ctx;
+    PTime const endNanos = UTIL_clockSpanNano(g_enableTime);
+    PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0;
+    assert(g_traceFile != NULL);
+    assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */
+    TRACE_log("compress", durationNanos, trace);
+}
+
+ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx)
+{
+    (void)dctx;
+    if (g_traceFile == NULL)
+        return 0;
+    return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime);
+}
+
+void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
+{
+    PTime const beginNanos = (PTime)ctx;
+    PTime const endNanos = UTIL_clockSpanNano(g_enableTime);
+    PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0;
+    assert(g_traceFile != NULL);
+    assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */
+    TRACE_log("decompress", durationNanos, trace);
+}
+
+#else /* ZSTD_TRACE */
+
+void TRACE_enable(char const* filename)
+{
+    (void)filename;
+}
+
+void TRACE_finish(void) {}
+
+#endif /* ZSTD_TRACE */
diff --git a/programs/zstdcli_trace.h b/programs/zstdcli_trace.h
new file mode 100644
index 0000000..38c27dc
--- /dev/null
+++ b/programs/zstdcli_trace.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDCLI_TRACE_H
+#define ZSTDCLI_TRACE_H
+
+/**
+ * Enable tracing - log to filename.
+ */
+void TRACE_enable(char const* filename);
+
+/**
+ * Shut down the tracing library.
+ */
+void TRACE_finish(void);
+
+#endif /* ZSTDCLI_TRACE_H */
diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
index c8af908..bf96185 100644
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTDGREP" "1" "December 2020" "zstd 1.4.7" "User Commands"
+.TH "ZSTDGREP" "1" "May 2021" "zstd 1.5.0" "User Commands"
 .
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
diff --git a/programs/zstdless.1 b/programs/zstdless.1
index be92e35..f08bc19 100644
--- a/programs/zstdless.1
+++ b/programs/zstdless.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTDLESS" "1" "December 2020" "zstd 1.4.7" "User Commands"
+.TH "ZSTDLESS" "1" "May 2021" "zstd 1.5.0" "User Commands"
 .
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
diff --git a/tests/DEPRECATED-test-zstd-speed.py b/tests/DEPRECATED-test-zstd-speed.py
index b3f8074..665e0a7 100755
--- a/tests/DEPRECATED-test-zstd-speed.py
+++ b/tests/DEPRECATED-test-zstd-speed.py
@@ -2,7 +2,7 @@
 # THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py
 
 # ################################################################
-# Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+# Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/Makefile b/tests/Makefile
index 42bc353..8555300 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -24,11 +24,12 @@
 PYTHON ?= python3
 TESTARTEFACT := versionsTest
 
-DEBUGLEVEL ?= 1
+DEBUGLEVEL ?= 2
 export DEBUGLEVEL  # transmit value to sub-makefiles
 DEBUGFLAGS  = -g -DDEBUGLEVEL=$(DEBUGLEVEL)
 CPPFLAGS   += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-              -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
+              -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
+			  -DZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY=1
 ifeq ($(OS),Windows_NT)   # MinGW assumed
 CPPFLAGS   += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
 endif
@@ -37,16 +38,15 @@
               -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
               -Wstrict-prototypes -Wundef                                     \
               -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings      \
-              -Wredundant-decls -Wmissing-prototypes
-CFLAGS     += $(DEBUGFLAGS) $(MOREFLAGS)
-FLAGS       = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+              -Wredundant-decls -Wmissing-prototypes -Wno-deprecated-declarations
+CFLAGS     += $(DEBUGFLAGS)
+CPPFLAGS   += $(MOREFLAGS)
 
 
 ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_FILES   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
 ZSTD_FILES  := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
-ZBUFF_FILES := $(ZSTDDIR)/deprecated/*.c
 ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c
 
 ZSTD_F1 := $(wildcard $(ZSTD_FILES))
@@ -107,7 +107,6 @@
 %-dll : libzstd
 %-dll : LDFLAGS += -L$(ZSTDDIR) -lzstd
 
-.PHONY: $(ZSTDDIR)/libzstd.a
 $(ZSTDDIR)/libzstd.a :
 	$(MAKE) -C $(ZSTDDIR) libzstd.a
 
@@ -132,7 +131,7 @@
 	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 
 fullbench32: CPPFLAGS += -m32
-fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP)
+fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) -Wno-deprecated-declarations
 fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD)
 fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG  # turn off assert() for speed measurements
 fullbench fullbench32 : $(ZSTD_FILES)
@@ -146,9 +145,9 @@
 # note : broken : requires symbols unavailable from dynamic library
 fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/timefn.c fullbench.c
 #	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
-	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT)
+	$(LINK.c) $^ $(LDLIBS) -o $@$(EXT)
 
-fuzzer : CPPFLAGS += $(MULTITHREAD_CPP)
+fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) -Wno-deprecated-declarations
 fuzzer : LDFLAGS += $(MULTITHREAD_LD)
 fuzzer : $(ZSTDMT_OBJECTS)
 fuzzer fuzzer32 : $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
@@ -161,15 +160,6 @@
 fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
 
-zbufftest zbufftest32 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated
-zbufftest zbufftest32 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations   # required to silence deprecation warnings
-zbufftest32 : CFLAGS +=  -m32
-zbufftest zbufftest32 : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
-	$(CC) $(FLAGS) $^ -o $@$(EXT)
-
-zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
-
 ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c
 ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES)
 ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES)
@@ -241,8 +231,8 @@
         $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
         fullbench$(EXT) fullbench32$(EXT) \
         fullbench-lib$(EXT) fullbench-dll$(EXT) \
-        fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \
-        fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT) \
+        fuzzer$(EXT) fuzzer32$(EXT) \
+        fuzzer-dll$(EXT) zstreamtest-dll$(EXT) \
         zstreamtest$(EXT) zstreamtest32$(EXT) \
         datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
         symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \
@@ -346,12 +336,6 @@
 test-fuzzer32: fuzzer32
 	$(QEMU_SYS) ./fuzzer32 -v $(FUZZERTEST) $(FUZZER_FLAGS)
 
-test-zbuff: zbufftest
-	$(QEMU_SYS) ./zbufftest $(ZSTREAM_TESTTIME)
-
-test-zbuff32: zbufftest32
-	$(QEMU_SYS) ./zbufftest32 $(ZSTREAM_TESTTIME)
-
 test-zstream: zstreamtest
 	$(QEMU_SYS) ./zstreamtest -v $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
 	$(QEMU_SYS) ./zstreamtest --newapi -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
diff --git a/tests/README.md b/tests/README.md
index 23e0076..cd255e9 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -8,7 +8,6 @@
 - `paramgrill` : parameter tester for zstd
 - `test-zstd-speed.py` : script for testing zstd speed difference between commits
 - `test-zstd-versions.py` : compatibility test between zstd versions stored on Github (v0.1+)
-- `zbufftest`  : Test tool to check ZBUFF (a buffered streaming API) integrity
 - `zstreamtest` : Fuzzer test tool for zstd streaming API
 - `legacy` : Test tool to test decoding of legacy zstd frames
 - `decodecorpus` : Tool to generate valid Zstandard frames, for verifying decoder implementations
@@ -28,7 +27,7 @@
 be run on any machine via the command line interface.
 
 There are three modes of usage for this script: fastmode will just run a minimal single
-build comparison (between facebook:dev and facebook:master), onetime will pull all the current
+build comparison (between facebook:dev and facebook:release), onetime will pull all the current
 pull requests from the zstd repo and compare facebook:dev to all of them once, continuous
 will continuously get pull requests from the zstd repo and run benchmarks against facebook:dev.
 
diff --git a/tests/automated_benchmarking.py b/tests/automated_benchmarking.py
index d0cfb1f..458bda4 100644
--- a/tests/automated_benchmarking.py
+++ b/tests/automated_benchmarking.py
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2020-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -20,7 +20,7 @@
 
 GITHUB_API_PR_URL = "https://api.github.com/repos/facebook/zstd/pulls?state=open"
 GITHUB_URL_TEMPLATE = "https://github.com/{}/zstd"
-MASTER_BUILD = {"user": "facebook", "branch": "dev", "hash": None}
+RELEASE_BUILD = {"user": "facebook", "branch": "dev", "hash": None}
 
 # check to see if there are any new PRs every minute
 DEFAULT_MAX_API_CALL_FREQUENCY_SEC = 60
@@ -264,11 +264,11 @@
         for test_build in builds:
             if dictionary_filename == None:
                 regressions = get_regressions(
-                    MASTER_BUILD, test_build, iterations, filenames, levels
+                    RELEASE_BUILD, test_build, iterations, filenames, levels
                 )
             else:
                 regressions = get_regressions_dictionary(
-                    MASTER_BUILD, test_build, filenames, dictionary_filename, levels, iterations
+                    RELEASE_BUILD, test_build, filenames, dictionary_filename, levels, iterations
                 )
             body = "\n".join(regressions)
             if len(regressions) > 0:
@@ -320,7 +320,7 @@
         builds = [{"user": None, "branch": "None", "hash": None}]
         main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
     elif mode == "fastmode":
-        builds = [{"user": "facebook", "branch": "master", "hash": None}]
+        builds = [{"user": "facebook", "branch": "release", "hash": None}]
         main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
     else:
         main(filenames, levels, iterations, None, emails, True, frequency=frequency, dictionary_filename=dictionary_filename)
diff --git a/tests/bigdict.c b/tests/bigdict.c
index aeda56c..fb08925 100644
--- a/tests/bigdict.c
+++ b/tests/bigdict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/checkTag.c b/tests/checkTag.c
index 90af24a..f6c5e97 100644
--- a/tests/checkTag.c
+++ b/tests/checkTag.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/datagencli.c b/tests/datagencli.c
index 713ca99..ecc05f9 100644
--- a/tests/datagencli.c
+++ b/tests/datagencli.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 50935d3..fa6a2d6 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 37f0e24..a71117e 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -12,6 +12,7 @@
 /*_************************************
 *  Includes
 **************************************/
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, we still bench some deprecated functions */
 #include "util.h"        /* Compiler options, UTIL_GetFileSize */
 #include <stdlib.h>      /* malloc */
 #include <stdio.h>       /* fprintf, fopen, ftello64 */
diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore
index 9bd280c..93d935a 100644
--- a/tests/fuzz/.gitignore
+++ b/tests/fuzz/.gitignore
@@ -16,9 +16,11 @@
 decompress_dstSize_tooSmall
 fse_read_ncount
 sequence_compression_api
+seekable_roundtrip
 fuzz-*.log
 rt_lib_*
 d_lib_*
+crash-*
 
 # misc
 trace
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 36232a8..ccb574b 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -25,10 +25,11 @@
 
 ZSTDDIR = ../../lib
 PRGDIR = ../../programs
+CONTRIBDIR = ../../contrib
 
 FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
 	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
-	-I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
+	-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
 FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
 	-Wstrict-prototypes -Wundef \
@@ -46,6 +47,9 @@
 FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h
 FUZZ_SRC := $(PRGDIR)/util.c ./fuzz_helpers.c ./zstd_helpers.c ./fuzz_data_producer.c
 
+SEEKABLE_HEADERS = $(CONTRIBDIR)/seekable_format/zstd_seekable.h
+SEEKABLE_OBJS = $(CONTRIBDIR)/seekable_format/zstdseek_compress.c $(CONTRIBDIR)/seekable_format/zstdseek_decompress.c
+
 ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
@@ -98,7 +102,8 @@
 	dictionary_stream_round_trip \
 	decompress_dstSize_tooSmall \
 	fse_read_ncount \
-	sequence_compression_api
+	sequence_compression_api \
+	seekable_roundtrip
 
 all: libregression.a $(FUZZ_TARGETS)
 
@@ -192,6 +197,9 @@
 sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o
 	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o $(LIB_FUZZING_ENGINE) -o $@
 
+seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS)  rt_fuzz_seekable_roundtrip.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(LIB_FUZZING_ENGINE) -o $@
+
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
 
diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c
index 64d70f0..bdbf769 100644
--- a/tests/fuzz/block_decompress.c
+++ b/tests/fuzz/block_decompress.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c
index 097fc01..46a84c7 100644
--- a/tests/fuzz/block_round_trip.c
+++ b/tests/fuzz/block_round_trip.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/decompress_dstSize_tooSmall.c b/tests/fuzz/decompress_dstSize_tooSmall.c
index e47b3d0..3f7607b 100644
--- a/tests/fuzz/decompress_dstSize_tooSmall.c
+++ b/tests/fuzz/decompress_dstSize_tooSmall.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c
index 9944baa..33c58c8 100644
--- a/tests/fuzz/dictionary_decompress.c
+++ b/tests/fuzz/dictionary_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_loader.c b/tests/fuzz/dictionary_loader.c
index f1fdf4d..5b60bc4 100644
--- a/tests/fuzz/dictionary_loader.c
+++ b/tests/fuzz/dictionary_loader.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c
index 7b7771e..7bff4bd 100644
--- a/tests/fuzz/dictionary_round_trip.c
+++ b/tests/fuzz/dictionary_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_stream_round_trip.c b/tests/fuzz/dictionary_stream_round_trip.c
index 67e8c69..9af712f 100644
--- a/tests/fuzz/dictionary_stream_round_trip.c
+++ b/tests/fuzz/dictionary_stream_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fse_read_ncount.c b/tests/fuzz/fse_read_ncount.c
index e20a938..c323860 100644
--- a/tests/fuzz/fse_read_ncount.c
+++ b/tests/fuzz/fse_read_ncount.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
index 8ee9645..810daa2 100644
--- a/tests/fuzz/fuzz.h
+++ b/tests/fuzz/fuzz.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index ef94a53..d8dfa77 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 # ################################################################
-# Copyright (c) 2016-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -62,6 +62,7 @@
     'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
     'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
     'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
+    'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
 }
 TARGETS = list(TARGET_INFO.keys())
 ALL_TARGETS = TARGETS + ['all']
@@ -180,14 +181,15 @@
     cxx_version_bytes = subprocess.check_output([cxx, "--version"])
     compiler = None
     version = None
+    print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
     if b'clang' in cc_version_bytes:
         assert(b'clang' in cxx_version_bytes)
         compiler = 'clang'
-    elif b'gcc' in cc_version_bytes:
+    elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
         assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
         compiler = 'gcc'
     if compiler is not None:
-        version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
+        version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
         version_match = re.search(version_regex, cc_version_bytes)
         version = tuple(int(version_match.group(i)) for i in range(1, 4))
     return compiler, version
@@ -195,9 +197,9 @@
 
 def overflow_ubsan_flags(cc, cxx):
     compiler, version = compiler_version(cc, cxx)
-    if compiler == 'gcc':
+    if compiler == 'gcc' and version < (8, 0, 0):
         return ['-fno-sanitize=signed-integer-overflow']
-    if compiler == 'clang' and version >= (5, 0, 0):
+    if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
         return ['-fno-sanitize=pointer-overflow']
     return []
 
diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c
index f2d5a1b..beb0155 100644
--- a/tests/fuzz/fuzz_data_producer.c
+++ b/tests/fuzz/fuzz_data_producer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h
index 25cc937..045aaff 100644
--- a/tests/fuzz/fuzz_data_producer.h
+++ b/tests/fuzz/fuzz_data_producer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz_helpers.c b/tests/fuzz/fuzz_helpers.c
index b80dc75..61c0deb 100644
--- a/tests/fuzz/fuzz_helpers.c
+++ b/tests/fuzz/fuzz_helpers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,4 +29,4 @@
         return 0;
     }
     return memcmp(lhs, rhs, size);
-}
\ No newline at end of file
+}
diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h
index cde2c4e..c180478 100644
--- a/tests/fuzz/fuzz_helpers.h
+++ b/tests/fuzz/fuzz_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/raw_dictionary_round_trip.c b/tests/fuzz/raw_dictionary_round_trip.c
index 08e5fd9..0e65176 100644
--- a/tests/fuzz/raw_dictionary_round_trip.c
+++ b/tests/fuzz/raw_dictionary_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
index 8180ca8..e6d2dec 100644
--- a/tests/fuzz/regression_driver.c
+++ b/tests/fuzz/regression_driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/seekable_roundtrip.c b/tests/fuzz/seekable_roundtrip.c
new file mode 100644
index 0000000..dcdcaae
--- /dev/null
+++ b/tests/fuzz/seekable_roundtrip.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd.h"
+#include "zstd_seekable.h"
+#include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
+
+static ZSTD_seekable *stream = NULL;
+static ZSTD_seekable_CStream *zscs = NULL;
+static const size_t kSeekableOverheadSize = ZSTD_seekTableFooterSize;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);
+    size_t const compressedBufferSize = ZSTD_compressBound(size) + kSeekableOverheadSize;
+    uint8_t* compressedBuffer = (uint8_t*)malloc(compressedBufferSize);
+    uint8_t* decompressedBuffer = (uint8_t*)malloc(size);
+
+    int const cLevel = FUZZ_dataProducer_int32Range(producer, ZSTD_minCLevel(), ZSTD_maxCLevel());
+    unsigned const checksumFlag = FUZZ_dataProducer_int32Range(producer, 0, 1);
+    size_t const uncompressedSize = FUZZ_dataProducer_uint32Range(producer, 0, size);
+    size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, size - uncompressedSize);
+    size_t seekSize;
+
+    if (!zscs) {
+        zscs = ZSTD_seekable_createCStream();
+        FUZZ_ASSERT(zscs);
+    }
+    if (!stream) {
+        stream = ZSTD_seekable_create();
+        FUZZ_ASSERT(stream);
+    }
+
+    {   /* Perform a compression */
+        size_t const initStatus = ZSTD_seekable_initCStream(zscs, cLevel, checksumFlag, size);
+        size_t endStatus;
+        ZSTD_outBuffer out = { .dst=compressedBuffer, .pos=0, .size=compressedBufferSize };
+        ZSTD_inBuffer  in  = { .src=src, .pos=0, .size=size };
+        FUZZ_ASSERT(!ZSTD_isError(initStatus));
+
+        do {
+            size_t cSize = ZSTD_seekable_compressStream(zscs, &out, &in);
+            FUZZ_ASSERT(!ZSTD_isError(cSize));
+        } while (in.pos != in.size);
+
+        FUZZ_ASSERT(in.pos == in.size);
+        endStatus = ZSTD_seekable_endStream(zscs, &out);
+        FUZZ_ASSERT(!ZSTD_isError(endStatus));
+        seekSize = out.pos;
+    }
+
+    {   /* Decompress at an offset */
+        size_t const initStatus = ZSTD_seekable_initBuff(stream, compressedBuffer, seekSize);
+        size_t decompressedBytesTotal = 0;
+        size_t dSize;
+
+        FUZZ_ZASSERT(initStatus);
+        do {
+            dSize = ZSTD_seekable_decompress(stream, decompressedBuffer, uncompressedSize, offset);
+            FUZZ_ASSERT(!ZSTD_isError(dSize));
+            decompressedBytesTotal += dSize;
+        } while (decompressedBytesTotal < uncompressedSize && dSize > 0);
+        FUZZ_ASSERT(decompressedBytesTotal == uncompressedSize);
+    }
+
+    FUZZ_ASSERT_MSG(!FUZZ_memcmp(src+offset, decompressedBuffer, uncompressedSize), "Corruption!");
+
+    free(decompressedBuffer);
+    free(compressedBuffer);
+    FUZZ_dataProducer_free(producer);
+
+#ifndef STATEFUL_FUZZING
+    ZSTD_seekable_free(stream); stream = NULL;
+    ZSTD_seekable_freeCStream(zscs); zscs = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
index e838687..cc840bf 100644
--- a/tests/fuzz/sequence_compression_api.c
+++ b/tests/fuzz/sequence_compression_api.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c
index 620177f..3716d0d 100644
--- a/tests/fuzz/simple_compress.c
+++ b/tests/fuzz/simple_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
index c3903ce..dfff11c 100644
--- a/tests/fuzz/simple_decompress.c
+++ b/tests/fuzz/simple_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
index 6e58fb1..c9fac26 100644
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c
index 5d2bb2a..e0cdd34 100644
--- a/tests/fuzz/stream_decompress.c
+++ b/tests/fuzz/stream_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c
index 286d387..719eac4 100644
--- a/tests/fuzz/stream_round_trip.c
+++ b/tests/fuzz/stream_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c
index 876a74e..9ce645d 100644
--- a/tests/fuzz/zstd_frame_info.c
+++ b/tests/fuzz/zstd_frame_info.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 5680bd6..4d889de 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -91,9 +91,13 @@
     /* Set misc parameters */
     setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
     setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
+    setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
+    setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer);
     setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
+    setRand(cctx, ZSTD_c_splitBlocks, 0, 1, producer);
+    setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
       setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
     }
diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h
index 6a4e340..7813884 100644
--- a/tests/fuzz/zstd_helpers.h
+++ b/tests/fuzz/zstd_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 2e5d70e..1ea6521 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -30,6 +30,7 @@
 #define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_compressContinue, ZSTD_compressBlock */
 #include "debug.h"        /* DEBUG_STATIC_ASSERT */
 #include "fse.h"
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, we still test some deprecated functions */
 #include "zstd.h"         /* ZSTD_VERSION_STRING */
 #include "zstd_errors.h"  /* ZSTD_getErrorCode */
 #define ZDICT_STATIC_LINKING_ONLY
@@ -42,6 +43,7 @@
 #include "timefn.h"       /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
 /* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
 #include "zstd_internal.h"  /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
+#include "threading.h"    /* ZSTD_pthread_create, ZSTD_pthread_join */
 
 
 /*-************************************
@@ -335,6 +337,126 @@
     }
 }
 
+#ifdef ZSTD_MULTITHREAD
+typedef struct {
+    ZSTD_CCtx* cctx;
+    ZSTD_threadPool* pool;
+    void* CNBuffer;
+    size_t CNBuffSize;
+    void* compressedBuffer;
+    size_t compressedBufferSize;
+    void* decodedBuffer;
+    int err;
+} threadPoolTests_compressionJob_payload;
+
+static void* threadPoolTests_compressionJob(void* payload) {
+    threadPoolTests_compressionJob_payload* args = (threadPoolTests_compressionJob_payload*)payload;
+    size_t cSize;
+    if (ZSTD_isError(ZSTD_CCtx_refThreadPool(args->cctx, args->pool))) args->err = 1;
+    cSize = ZSTD_compress2(args->cctx, args->compressedBuffer, args->compressedBufferSize, args->CNBuffer, args->CNBuffSize);
+    if (ZSTD_isError(cSize)) args->err = 1;
+    if (ZSTD_isError(ZSTD_decompress(args->decodedBuffer, args->CNBuffSize, args->compressedBuffer, cSize))) args->err = 1;
+    return payload;
+}
+
+static int threadPoolTests(void) {
+    int testResult = 0;
+    size_t err;
+
+    size_t const CNBuffSize = 5 MB;
+    void* const CNBuffer = malloc(CNBuffSize);
+    size_t const compressedBufferSize = ZSTD_compressBound(CNBuffSize);
+    void* const compressedBuffer = malloc(compressedBufferSize);
+    void* const decodedBuffer = malloc(CNBuffSize);
+
+    size_t const kPoolNumThreads = 8;
+
+    RDG_genBuffer(CNBuffer, CNBuffSize, 0.5, 0.5, 0);
+
+    DISPLAYLEVEL(3, "thread pool test : threadPool re-use roundtrips: ");
+    {
+        ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        ZSTD_threadPool* pool = ZSTD_createThreadPool(kPoolNumThreads);
+
+        size_t nbThreads = 1;
+        for (; nbThreads <= kPoolNumThreads; ++nbThreads) {
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+            ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, (int)nbThreads);
+            err = ZSTD_CCtx_refThreadPool(cctx, pool);
+            if (ZSTD_isError(err)) {
+                DISPLAYLEVEL(3, "refThreadPool error!\n");
+                ZSTD_freeCCtx(cctx);
+                goto _output_error;
+            }
+            err = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+            if (ZSTD_isError(err)) {
+                DISPLAYLEVEL(3, "Compression error!\n");
+                ZSTD_freeCCtx(cctx);
+                goto _output_error;
+            }
+            err = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, err);
+            if (ZSTD_isError(err)) {
+                DISPLAYLEVEL(3, "Decompression error!\n");
+                ZSTD_freeCCtx(cctx);
+                goto _output_error;
+            }
+        }
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeThreadPool(pool);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "thread pool test : threadPool simultaneous usage: ");
+    {
+        void* const decodedBuffer2 = malloc(CNBuffSize);
+        void* const compressedBuffer2 = malloc(compressedBufferSize);
+        ZSTD_threadPool* pool = ZSTD_createThreadPool(kPoolNumThreads);
+        ZSTD_CCtx* cctx1 = ZSTD_createCCtx();
+        ZSTD_CCtx* cctx2 = ZSTD_createCCtx();
+
+        ZSTD_pthread_t t1;
+        ZSTD_pthread_t t2;
+        threadPoolTests_compressionJob_payload p1 = {cctx1, pool, CNBuffer, CNBuffSize,
+                                                     compressedBuffer, compressedBufferSize, decodedBuffer, 0 /* err */};
+        threadPoolTests_compressionJob_payload p2 = {cctx2, pool, CNBuffer, CNBuffSize,
+                                                     compressedBuffer2, compressedBufferSize, decodedBuffer2, 0 /* err */};
+
+        ZSTD_CCtx_setParameter(cctx1, ZSTD_c_nbWorkers, 2);
+        ZSTD_CCtx_setParameter(cctx2, ZSTD_c_nbWorkers, 2);
+        ZSTD_CCtx_refThreadPool(cctx1, pool);
+        ZSTD_CCtx_refThreadPool(cctx2, pool);
+
+        ZSTD_pthread_create(&t1, NULL, threadPoolTests_compressionJob, &p1);
+        ZSTD_pthread_create(&t2, NULL, threadPoolTests_compressionJob, &p2);
+        ZSTD_pthread_join(t1, NULL);
+        ZSTD_pthread_join(t2, NULL);
+
+        assert(!memcmp(decodedBuffer, decodedBuffer2, CNBuffSize));
+        free(decodedBuffer2);
+        free(compressedBuffer2);
+
+        ZSTD_freeThreadPool(pool);
+        ZSTD_freeCCtx(cctx1);
+        ZSTD_freeCCtx(cctx2);
+
+        if (p1.err || p2.err) goto _output_error;
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+_end:
+    free(CNBuffer);
+    free(compressedBuffer);
+    free(decodedBuffer);
+    return testResult;
+
+_output_error:
+    testResult = 1;
+    DISPLAY("Error detected in Unit tests ! \n");
+    goto _end;
+}
+#endif /* ZSTD_MULTITHREAD */
+
 /*=============================================
 *   Unit tests
 =============================================*/
@@ -374,6 +496,12 @@
         DISPLAYLEVEL(3, "%i (OK) \n", mcl);
     }
 
+    DISPLAYLEVEL(3, "test%3u : default compression level : ", testNb++);
+    {   int const defaultCLevel = ZSTD_defaultCLevel();
+        if (defaultCLevel != ZSTD_CLEVEL_DEFAULT) goto _output_error;
+        DISPLAYLEVEL(3, "%i (OK) \n", defaultCLevel);
+    }
+
     DISPLAYLEVEL(3, "test%3u : ZSTD_versionNumber : ", testNb++);
     {   unsigned const vn = ZSTD_versionNumber();
         DISPLAYLEVEL(3, "%u (OK) \n", vn);
@@ -675,6 +803,41 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    {
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 100, 1);
+        ZSTD_parameters const params = ZSTD_getParams(1, 0, 0);
+        CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressCCtx() doesn't use advanced parameters", testNb++);
+        CHECK_Z(ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 1));
+        if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error;
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingDict() doesn't use advanced parameters: ", testNb++);
+        CHECK_Z(ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize, NULL, 0, NULL, 0, 1));
+        if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error;
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingCDict() doesn't use advanced parameters: ", testNb++);
+        CHECK_Z(ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, NULL, 0, cdict));
+        if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error;
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compress_advanced() doesn't use advanced parameters: ", testNb++);
+        CHECK_Z(ZSTD_compress_advanced(cctx, compressedBuffer, compressedBufferSize, NULL, 0, NULL, 0, params));
+        if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error;
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingCDict_advanced() doesn't use advanced parameters: ", testNb++);
+        CHECK_Z(ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize, NULL, 0, cdict, params.fParams));
+        if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error;
+        DISPLAYLEVEL(3, "OK \n");
+
+        ZSTD_freeCDict(cdict);
+        ZSTD_freeCCtx(cctx);
+    }
+
     DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++);
     {
         ZSTD_CCtx* const cctx = ZSTD_createCCtx();
@@ -725,6 +888,50 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : testing dict compression for determinism : ", testNb++);
+    {
+        size_t const testSize = 1024;
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+        char* dict = (char*)malloc(2 * testSize);
+        int ldmEnabled, level;
+
+        RDG_genBuffer(dict, testSize, 0.5, 0.5, seed);
+        RDG_genBuffer(CNBuffer, testSize, 0.6, 0.6, seed);
+        memcpy(dict + testSize, CNBuffer, testSize);
+        for (level = 1; level <= 5; ++level) {
+            for (ldmEnabled = 0; ldmEnabled <= 1; ++ldmEnabled) {
+                size_t cSize0;
+                XXH64_hash_t compressedChecksum0;
+
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level));
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ldmEnabled));
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_deterministicRefPrefix, 1));
+
+                CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, testSize));
+                cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, testSize);
+                CHECK_Z(cSize);
+                CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, testSize, compressedBuffer, cSize, dict, testSize));
+
+                cSize0 = cSize;
+                compressedChecksum0 = XXH64(compressedBuffer, cSize, 0);
+
+                CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, testSize));
+                cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, dict + testSize, testSize);
+                CHECK_Z(cSize);
+
+                if (cSize != cSize0) goto _output_error;
+                if (XXH64(compressedBuffer, cSize, 0) != compressedChecksum0) goto _output_error;
+            }
+        }
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeDCtx(dctx);
+        free(dict);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : LDM + opt parser with small uncompressible block ", testNb++);
     {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
         ZSTD_DCtx* dctx = ZSTD_createDCtx();
@@ -1509,6 +1716,15 @@
         ZSTD_freeCCtx(cctx);
     }
 
+    DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++)
+    {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_splitBlocks, 1) );
+        cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+        CHECK(cSize);
+        ZSTD_freeCCtx(cctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
     {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
         size_t cSize1, cSize2;
@@ -1539,6 +1755,7 @@
 
     DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++)
     {   ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
+        int const jobSize = 512 KB;
         int value;
         /* Check that the overlap log and job size are unset. */
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
@@ -1547,19 +1764,18 @@
         CHECK_EQ(value, 0);
         /* Set and check the overlap log and job size. */
         CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) );
-        CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, 2 MB) );
+        CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, jobSize) );
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
         CHECK_EQ(value, 5);
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
-        CHECK_EQ(value, 2 MB);
+        CHECK_EQ(value, jobSize);
         /* Set the number of workers and check the overlap log and job size. */
         CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) );
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
         CHECK_EQ(value, 5);
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
-        CHECK_EQ(value, 2 MB);
+        CHECK_EQ(value, jobSize);
         ZSTD_freeCCtxParams(params);
-
     }
     DISPLAYLEVEL(3, "OK \n");
 
@@ -1570,6 +1786,11 @@
         int const segs = 4;
         /* only use the first half so we don't push against size limit of compressedBuffer */
         size_t const segSize = (CNBuffSize / 2) / segs;
+
+        const U32 skipLen = 129 KB;
+        char* const skipBuff = (char*)malloc(skipLen);
+        assert(skipBuff != NULL);
+        memset(skipBuff, 0, skipLen);
         for (i = 0; i < segs; i++) {
             CHECK_NEWV(r, ZSTD_compress(
                             (BYTE*)compressedBuffer + off, CNBuffSize - off,
@@ -1578,13 +1799,15 @@
             off += r;
             if (i == segs/2) {
                 /* insert skippable frame */
-                const U32 skipLen = 129 KB;
-                MEM_writeLE32((BYTE*)compressedBuffer + off, ZSTD_MAGIC_SKIPPABLE_START);
-                MEM_writeLE32((BYTE*)compressedBuffer + off + 4, skipLen);
-                off += skipLen + ZSTD_SKIPPABLEHEADERSIZE;
+                size_t const skippableSize =
+                    ZSTD_writeSkippableFrame((BYTE*)compressedBuffer + off, compressedBufferSize,
+                                             skipBuff, skipLen, seed % 15);
+                CHECK_Z(skippableSize);
+                off += skippableSize;
             }
         }
         cSize = off;
+        free(skipBuff);
     }
     DISPLAYLEVEL(3, "OK \n");
 
@@ -1681,10 +1904,7 @@
 
         DISPLAYLEVEL(3, "test%3i : check content size on duplicated context : ", testNb++);
         {   size_t const testSize = CNBuffSize / 3;
-            {   ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);
-                p.fParams.contentSizeFlag = 1;
-                CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
-            }
+            CHECK( ZSTD_compressBegin(ctxOrig, ZSTD_defaultCLevel()) );
             CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) );
 
             CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
@@ -1700,13 +1920,14 @@
             size_t const contentSize = 9 KB;
             const void* const dict = (const char*)CNBuffer;
             const void* const contentStart = (const char*)dict + flatdictSize;
+            /* These upper bounds are generally within a few bytes of the compressed size */
             size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770,
                                                        3770, 3770, 3770, 3750, 3750,
                                                        3742, 3670, 3670, 3660, 3660,
                                                        3660, 3660, 3660, 3660, 3660,
                                                        3660, 3660, 3660 };
             size_t const target_wdict_cSize[22+1] =  { 2830, 2890, 2890, 2820, 2940,
-                                                       2950, 2950, 2921, 2900, 2891,
+                                                       2950, 2950, 2925, 2900, 2891,
                                                        2910, 2910, 2910, 2770, 2760,
                                                        2750, 2750, 2750, 2750, 2750,
                                                        2750, 2750, 2750 };
@@ -1743,6 +1964,22 @@
                 DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n",
                                 l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
             }
+            /* Dict compression with DMS */
+            for ( l=1 ; l <= maxLevel; l++) {
+                size_t wdict_cSize;
+                CHECK_Z( ZSTD_CCtx_loadDictionary(ctxOrig, dict, flatdictSize) );
+                CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
+                CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
+                CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
+                wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
+                if (wdict_cSize > target_wdict_cSize[l]) {
+                    DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",
+                                    l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]);
+                    goto _output_error;
+                }
+                DISPLAYLEVEL(4, "level %i with dictionary and compress2 : max expected %u >= reached %u \n",
+                                l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
+            }
 
             DISPLAYLEVEL(4, "compression efficiency tests OK \n");
         }
@@ -1763,6 +2000,19 @@
         size_t dictSize;
         U32 dictID;
         size_t dictHeaderSize;
+        size_t dictBufferFixedSize = 144;
+        unsigned char const dictBufferFixed[144] = {0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
+                                                    0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                                    0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
+                                                    0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
+                                                    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+                                                    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+                                                    0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
+                                                    0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
+                                                    0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
+                                                    0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
+                                                    0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
+                                                    0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69};
 
         if (dictBuffer==NULL || samplesSizes==NULL) {
             free(dictBuffer);
@@ -1858,19 +2108,7 @@
         DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
 
         DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++);
-        {   unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
-                                                         0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                                                         0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
-                                                         0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
-                                                         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-                                                         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-                                                         0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
-                                                         0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
-                                                         0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
-                                                         0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
-                                                         0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
-                                                         0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 };
-            dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144);
+        {   dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, dictBufferFixedSize);
             if (dictHeaderSize != 115) goto _output_error;
         }
         DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
@@ -2324,6 +2562,74 @@
         }
         DISPLAYLEVEL(3, "OK \n");
 
+        DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with multiple ddicts : ", testNb++);
+        {
+            const size_t numDicts = 128;
+            const size_t numFrames = 4;
+            size_t i;
+            ZSTD_DCtx* dctx = ZSTD_createDCtx();
+            ZSTD_DDict** ddictTable = (ZSTD_DDict**)malloc(sizeof(ZSTD_DDict*)*numDicts);
+            ZSTD_CDict** cdictTable = (ZSTD_CDict**)malloc(sizeof(ZSTD_CDict*)*numDicts);
+            U32 dictIDSeed = seed;
+            /* Create new compressed buffer that will hold frames with differing dictIDs */
+            char* dictBufferMulti = (char*)malloc(sizeof(char) * dictBufferFixedSize);  /* Modifiable copy of fixed full dict buffer */
+
+            ZSTD_memcpy(dictBufferMulti, dictBufferFixed, dictBufferFixedSize);
+            /* Create a bunch of DDicts with random dict IDs */
+            for (i = 0; i < numDicts; ++i) {
+                U32 currDictID = FUZ_rand(&dictIDSeed);
+                MEM_writeLE32(dictBufferMulti+ZSTD_FRAMEIDSIZE, currDictID);
+                ddictTable[i] = ZSTD_createDDict(dictBufferMulti, dictBufferFixedSize);
+                cdictTable[i] = ZSTD_createCDict(dictBufferMulti, dictBufferFixedSize, 3);
+                if (!ddictTable[i] || !cdictTable[i] || ZSTD_getDictID_fromCDict(cdictTable[i]) != ZSTD_getDictID_fromDDict(ddictTable[i])) {
+                    goto _output_error;
+                }
+            }
+            /* Compress a few frames using random CDicts */
+            {
+                size_t off = 0;
+                /* only use the first half so we don't push against size limit of compressedBuffer */
+                size_t const segSize = (CNBuffSize / 2) / numFrames;
+                for (i = 0; i < numFrames; i++) {
+                    size_t dictIdx = FUZ_rand(&dictIDSeed) % numDicts;
+                    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+                    {   CHECK_NEWV(r, ZSTD_compress_usingCDict(cctx,
+                                    (BYTE*)compressedBuffer + off, CNBuffSize - off,
+                                    (BYTE*)CNBuffer + segSize * (size_t)i, segSize,
+                                    cdictTable[dictIdx]));
+                        off += r;
+                    }
+                }
+                cSize = off;
+            }
+
+            /* We should succeed to decompression even though different dicts were used on different frames */
+            ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
+            ZSTD_DCtx_setParameter(dctx, ZSTD_d_refMultipleDDicts, ZSTD_rmd_refMultipleDDicts);
+            /* Reference every single ddict we made */
+            for (i = 0; i < numDicts; ++i) {
+                CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddictTable[i]));
+            }
+            CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+            /* Streaming decompression should also work */
+            {
+                ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
+                ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0};
+                while (in.pos < in.size) {
+                    CHECK_Z(ZSTD_decompressStream(dctx, &out, &in));
+                }
+            }
+            ZSTD_freeDCtx(dctx);
+            for (i = 0; i < numDicts; ++i) {
+                ZSTD_freeCDict(cdictTable[i]);
+                ZSTD_freeDDict(ddictTable[i]);
+            }
+            free(dictBufferMulti);
+            free(ddictTable);
+            free(cdictTable);
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
         ZSTD_freeCCtx(cctx);
         free(dictBuffer);
         free(samplesSizes);
@@ -2419,12 +2725,8 @@
         int const compressionLevel = -1;
 
         assert(cctx != NULL);
-        {   ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0);
-            size_t const cSize_1pass = ZSTD_compress_advanced(cctx,
-                                        compressedBuffer, compressedBufferSize,
-                                        CNBuffer, srcSize,
-                                        NULL, 0,
-                                        params);
+        {   size_t const cSize_1pass = ZSTD_compress(compressedBuffer, compressedBufferSize,
+                                                     CNBuffer, srcSize, compressionLevel);
             if (ZSTD_isError(cSize_1pass)) goto _output_error;
 
             CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
@@ -2739,7 +3041,7 @@
         free(seqs);
     }
     DISPLAYLEVEL(3, "OK \n");
-    
+
     DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++);
     {
         size_t srcSize = 500 KB;
@@ -3044,7 +3346,84 @@
         free(dict);
     }
     DISPLAYLEVEL(3, "OK \n");
-#endif
+
+    DISPLAYLEVEL(3, "test%3i : ZSTD_getCParams() + dictionary ", testNb++);
+    {
+        ZSTD_compressionParameters const medium = ZSTD_getCParams(1, 16*1024-1, 0);
+        ZSTD_compressionParameters const large = ZSTD_getCParams(1, 128*1024-1, 0);
+        ZSTD_compressionParameters const smallDict = ZSTD_getCParams(1, 0, 400);
+        ZSTD_compressionParameters const mediumDict = ZSTD_getCParams(1, 0, 10000);
+        ZSTD_compressionParameters const largeDict = ZSTD_getCParams(1, 0, 100000);
+
+        assert(!memcmp(&smallDict, &mediumDict, sizeof(smallDict)));
+        assert(!memcmp(&medium, &mediumDict, sizeof(medium)));
+        assert(!memcmp(&large, &largeDict, sizeof(large)));
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3i : ZSTD_adjustCParams() + dictionary ", testNb++);
+    {
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, 0, 0);
+        ZSTD_compressionParameters const smallDict = ZSTD_adjustCParams(cParams, 0, 400);
+        ZSTD_compressionParameters const smallSrcAndDict = ZSTD_adjustCParams(cParams, 500, 400);
+
+        assert(smallSrcAndDict.windowLog == 10);
+        assert(!memcmp(&cParams, &smallDict, sizeof(cParams)));
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3i : check compression mem usage monotonicity over levels for estimateCCtxSize() : ", testNb++);
+    {
+        int level = 1;
+        size_t prevSize = 0;
+        for (; level < ZSTD_maxCLevel(); ++level) {
+            size_t const currSize = ZSTD_estimateCCtxSize(level);
+            if (prevSize > currSize) {
+                DISPLAYLEVEL(3, "Error! previous cctx size: %zu at level: %d is larger than current cctx size: %zu at level: %d",
+                             prevSize, level-1, currSize, level);
+                goto _output_error;
+            }
+            prevSize = currSize;
+        }
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3i : check estimateCCtxSize() always larger or equal to ZSTD_estimateCCtxSize_usingCParams() : ", testNb++);
+    {
+        size_t const kSizeIncrement = 2 KB;
+        int level = -3;
+
+        for (; level <= ZSTD_maxCLevel(); ++level) {
+            size_t dictSize = 0;
+            for (; dictSize <= 256 KB; dictSize += 8 * kSizeIncrement) {
+                size_t srcSize = 2 KB;
+                for (; srcSize < 300 KB; srcSize += kSizeIncrement) {
+                    ZSTD_compressionParameters const cParams = ZSTD_getCParams(level, srcSize, dictSize);
+                    size_t const cctxSizeUsingCParams = ZSTD_estimateCCtxSize_usingCParams(cParams);
+                    size_t const cctxSizeUsingLevel = ZSTD_estimateCCtxSize(level);
+                    if (cctxSizeUsingLevel < cctxSizeUsingCParams
+                     || ZSTD_isError(cctxSizeUsingCParams)
+                     || ZSTD_isError(cctxSizeUsingLevel)) {
+                        DISPLAYLEVEL(3, "error! l: %d dict: %zu srcSize: %zu cctx size cpar: %zu, cctx size level: %zu\n",
+                                     level, dictSize, srcSize, cctxSizeUsingCParams, cctxSizeUsingLevel);
+                        goto _output_error;
+                    }
+                }
+            }
+        }
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3i : thread pool API tests : \n", testNb++)
+    {
+        int const threadPoolTestResult = threadPoolTests();
+        if (threadPoolTestResult) {
+            goto _output_error;
+        }
+    }
+    DISPLAYLEVEL(3, "thread pool tests OK \n");
+
+#endif /* ZSTD_MULTITHREAD */
 
 _end:
     free(CNBuffer);
diff --git a/tests/gzip/Makefile b/tests/gzip/Makefile
index 73f62f0..a50350f 100644
--- a/tests/gzip/Makefile
+++ b/tests/gzip/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2017-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/invalidDictionaries.c b/tests/invalidDictionaries.c
index 23e93fd..b71f741 100644
--- a/tests/invalidDictionaries.c
+++ b/tests/invalidDictionaries.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/legacy.c b/tests/legacy.c
index 3d3ec43..ac4938f 100644
--- a/tests/legacy.c
+++ b/tests/legacy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/libzstd_partial_builds.sh b/tests/libzstd_partial_builds.sh
index bee2dbd..05dad8f 100755
--- a/tests/libzstd_partial_builds.sh
+++ b/tests/libzstd_partial_builds.sh
@@ -21,7 +21,7 @@
     $ECHO "$@ correctly not present"  # for some reason, this $ECHO must exist, otherwise mustBeAbsent() always fails (??)
 }
 
-# default compilation : all features enabled
+# default compilation : all features enabled - no zbuff
 $ECHO "testing default library compilation"
 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
@@ -29,10 +29,10 @@
 isPresent "zstd_decompress.o"
 isPresent "zdict.o"
 isPresent "zstd_v07.o"
-isPresent "zbuff_compress.o"
+mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
-# compression disabled => also disable zdict and zbuff
+# compression disabled => also disable zdict
 $ECHO "testing with compression disabled"
 ZSTD_LIB_COMPRESSION=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
@@ -43,7 +43,7 @@
 mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
-# decompression disabled => also disable legacy and zbuff
+# decompression disabled => also disable legacy
 $ECHO "testing with decompression disabled"
 ZSTD_LIB_DECOMPRESSION=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
@@ -65,6 +65,17 @@
 mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
+# deprecated function enabled => zbuff present
+$ECHO "testing with deprecated functions enabled"
+ZSTD_LIB_DEPRECATED=1 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
+nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
+isPresent "zstd_compress.o"
+isPresent "zstd_decompress.o"
+isPresent "zdict.o"
+isPresent "zstd_v07.o"
+isPresent "zbuff_compress.o"
+$RM $DIR/../lib/libzstd.a tmplog
+
 # dictionary builder disabled => only remove zdict
 $ECHO "testing with dictionary builder disabled"
 ZSTD_LIB_DICTBUILDER=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
@@ -73,7 +84,7 @@
 isPresent "zstd_decompress.o"
 mustBeAbsent "zdict.o"
 isPresent "zstd_v07.o"
-isPresent "zbuff_compress.o"
+mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
 # both decompression and dictionary builder disabled => only compression remains
diff --git a/tests/longmatch.c b/tests/longmatch.c
index 93e78dd..a171c0e 100644
--- a/tests/longmatch.c
+++ b/tests/longmatch.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 439aebe..a0cfa58 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/playTests.sh b/tests/playTests.sh
index 51b42b6..fa748c0 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -114,15 +114,33 @@
 case "$UNAME" in
   Darwin) MD5SUM="md5 -r" ;;
   FreeBSD) MD5SUM="gmd5sum" ;;
+  NetBSD) MD5SUM="md5 -n" ;;
   OpenBSD) MD5SUM="md5" ;;
   *) MD5SUM="md5sum" ;;
 esac
 
 MTIME="stat -c %Y"
 case "$UNAME" in
-    Darwin | FreeBSD | OpenBSD) MTIME="stat -f %m" ;;
+    Darwin | FreeBSD | OpenBSD | NetBSD) MTIME="stat -f %m" ;;
 esac
 
+GET_PERMS="stat -c %a"
+case "$UNAME" in
+    Darwin | FreeBSD | OpenBSD | NetBSD) GET_PERMS="stat -f %Lp" ;;
+esac
+
+assertFilePermissions() {
+    STAT1=$($GET_PERMS "$1")
+    STAT2=$2
+    [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match expected ($STAT1 != $STAT2)"
+}
+
+assertSamePermissions() {
+    STAT1=$($GET_PERMS "$1")
+    STAT2=$($GET_PERMS "$2")
+    [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match those on $2 ($STAT1 != $STAT2)"
+}
+
 DIFF="diff"
 case "$UNAME" in
   SunOS) DIFF="gdiff" ;;
@@ -191,7 +209,7 @@
 zstd tmp -c > tmpCompressed
 zstd tmp --stdout > tmpCompressed       # long command format
 println "test : compress to named file"
-rm tmpCompressed
+rm -f tmpCompressed
 zstd tmp -o tmpCompressed
 test -f tmpCompressed   # file must be created
 println "test : force write, correct order"
@@ -345,7 +363,7 @@
 zstd tmp -f -o "$INTOVOID" 2>&1 | grep -v "Refusing to remove non-regular file"
 println "test : --rm on stdin"
 println a | zstd --rm > $INTOVOID   # --rm should remain silent
-rm tmp
+rm -f tmp
 zstd -f tmp && die "tmp not present : should have failed"
 test ! -f tmp.zst  # tmp.zst should not be created
 println "test : -d -f do not delete destination when source is not present"
@@ -353,7 +371,7 @@
 zstd -d -f tmp.zst && die "attempt to decompress a non existing file"
 test -f tmp  # destination file should still be present
 println "test : -f do not delete destination when source is not present"
-rm tmp         # erase source file
+rm -f tmp         # erase source file
 touch tmp.zst  # create destination file
 zstd -f tmp && die "attempt to compress a non existing file"
 test -f tmp.zst  # destination file should still be present
@@ -367,7 +385,7 @@
 dd bs=1048576 count=1 if=/dev/zero of=tmp
 zstd -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
 $DIFF -s tmp1 tmp
-rm tmp*
+rm -f tmp*
 
 
 println "\n===>  compress multiple files"
@@ -414,7 +432,7 @@
 test -f tmp1.zst
 test -f tmp2.zst
 test -f tmp3.zst
-rm tmp1 tmp2 tmp3
+rm -f tmp1 tmp2 tmp3
 println "decompress tmp* : "
 zstd -df ./*.zst
 test -f tmp1
@@ -429,7 +447,7 @@
 test -f tmpdec  # should check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
 println "compress multiple files including a missing one (notHere) : "
 zstd -f tmp1 notHere tmp2 && die "missing file not detected!"
-rm tmp*
+rm -f tmp*
 
 
 if [ "$isWindows" = false ] ; then
@@ -444,6 +462,96 @@
     rm -rf tmp*
 fi
 
+println "\n===>  zstd created file permissions tests"
+if [ "$isWindows" = false ] ; then
+    rm -f tmp1 tmp2 tmp1.zst tmp2.zst tmp1.out tmp2.out # todo: remove
+
+    ORIGINAL_UMASK=$(umask)
+    umask 0000
+
+    datagen > tmp1
+    datagen > tmp2
+    assertFilePermissions tmp1 666
+    assertFilePermissions tmp2 666
+
+    println "test : copy 666 permissions in file -> file compression "
+    zstd -f tmp1 -o tmp1.zst
+    assertSamePermissions tmp1 tmp1.zst
+    println "test : copy 666 permissions in file -> file decompression "
+    zstd -f -d tmp1.zst -o tmp1.out
+    assertSamePermissions tmp1.zst tmp1.out
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : copy 400 permissions in file -> file compression (write to a read-only file) "
+    chmod 0400 tmp1
+    assertFilePermissions tmp1 400
+    zstd -f tmp1 -o tmp1.zst
+    assertSamePermissions tmp1 tmp1.zst
+    println "test : copy 400 permissions in file -> file decompression (write to a read-only file) "
+    zstd -f -d tmp1.zst -o tmp1
+    assertSamePermissions tmp1.zst tmp1
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : check created permissions from stdin input in compression "
+    zstd -f -o tmp1.zst < tmp1
+    assertFilePermissions tmp1.zst 666
+    println "test : check created permissions from stdin input in decompression "
+    zstd -f -d -o tmp1.out < tmp1.zst
+    assertFilePermissions tmp1.out 666
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : check created permissions from multiple inputs in compression "
+    zstd -f tmp1 tmp2 -o tmp1.zst
+    assertFilePermissions tmp1.zst 666
+    println "test : check created permissions from multiple inputs in decompression "
+    cp tmp1.zst tmp2.zst
+    zstd -f -d tmp1.zst tmp2.zst -o tmp1.out
+    assertFilePermissions tmp1.out 666
+
+    rm -f tmp1.zst tmp2.zst tmp1.out tmp2.out
+
+    println "test : check permissions on pre-existing output file in compression "
+    chmod 0600 tmp1
+    touch tmp1.zst
+    chmod 0400 tmp1.zst
+    zstd -f tmp1 -o tmp1.zst
+    assertFilePermissions tmp1.zst 600
+    println "test : check permissions on pre-existing output file in decompression "
+    chmod 0400 tmp1.zst
+    touch tmp1.out
+    chmod 0200 tmp1.out
+    zstd -f -d tmp1.zst -o tmp1.out
+    assertFilePermissions tmp1.out 400
+
+    rm -f tmp1.zst tmp1.out
+
+    umask 0666
+    chmod 0666 tmp1 tmp2
+
+    println "test : respect umask when copying permissions in file -> file compression "
+    zstd -f tmp1 -o tmp1.zst
+    assertFilePermissions tmp1.zst 0
+    println "test : respect umask when copying permissions in file -> file decompression "
+    chmod 0666 tmp1.zst
+    zstd -f -d tmp1.zst -o tmp1.out
+    assertFilePermissions tmp1.out 0
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : respect umask when compressing from stdin input "
+    zstd -f -o tmp1.zst < tmp1
+    assertFilePermissions tmp1.zst 0
+    println "test : respect umask when decompressing from stdin input "
+    chmod 0666 tmp1.zst
+    zstd -f -d -o tmp1.out < tmp1.zst
+    assertFilePermissions tmp1.out 0
+
+    rm -f tmp1 tmp2 tmp1.zst tmp2.zst tmp1.out tmp2.out
+    umask $ORIGINAL_UMASK
+fi
 
 if [ -n "$DEVNULLRIGHTS" ] ; then
     # these tests requires sudo rights, which is uncommon.
@@ -458,6 +566,22 @@
     ls -las $INTOVOID | grep "rw-rw-rw-"
 fi
 
+if [ -n "$READFROMBLOCKDEVICE" ] ; then
+    # This creates a temporary block device, which is only possible on unix-y
+    # systems, is somewhat invasive, and requires sudo. For these reasons, you
+    # have to specifically ask for this test.
+    println "\n===> checking that zstd can read from a block device"
+    datagen -g65536 > tmp.img
+    sudo losetup -fP tmp.img
+    LOOP_DEV=$(losetup -a | grep 'tmp\.img' | cut -f1 -d:)
+    [ -z "$LOOP_DEV" ] && die "failed to get loopback device"
+    sudoZstd $LOOP_DEV -c > tmp.img.zst && die "should fail without -f"
+    sudoZstd -f $LOOP_DEV -c > tmp.img.zst
+    zstd -d tmp.img.zst -o tmp.img.copy
+    sudo losetup -d $LOOP_DEV
+    $DIFF -s tmp.img tmp.img.copy || die "round trip failed"
+    rm -f tmp.img tmp.img.zst tmp.img.copy
+fi
 
 println "\n===>  compress multiple files into an output directory, --output-dir-flat"
 println henlo > tmp1
@@ -485,23 +609,29 @@
 if [ "$isWindows" = false ] ; then
     println "\n===>  compress multiple files into an output directory and mirror input folder, --output-dir-mirror"
     println "test --output-dir-mirror" > tmp1
-    mkdir -p tmpInputTestDir/we/must/go/deeper
-    println cool > tmpInputTestDir/we/must/go/deeper/tmp2
+    mkdir -p tmpInputTestDir/we/.../..must/go/deeper..
+    println cool > tmpInputTestDir/we/.../..must/go/deeper../tmp2
     zstd tmp1 -r tmpInputTestDir --output-dir-mirror tmpOutDir
     test -f tmpOutDir/tmp1.zst
-    test -f tmpOutDir/tmpInputTestDir/we/must/go/deeper/tmp2.zst
+    test -f tmpOutDir/tmpInputTestDir/we/.../..must/go/deeper../tmp2.zst
 
     println "test: compress input dir will be ignored if it has '..'"
-    zstd  -r tmpInputTestDir/we/must/../must --output-dir-mirror non-exist && die "input cannot contain '..'"
+    zstd  -r tmpInputTestDir/we/.../..must/../..mustgo/deeper.. --output-dir-mirror non-exist && die "input cannot contain '..'"
+    zstd  -r tmpInputTestDir/we/.../..must/deeper../.. --output-dir-mirror non-exist && die "input cannot contain '..'"
+    zstd  -r ../tests/tmpInputTestDir/we/.../..must/deeper.. --output-dir-mirror non-exist && die "input cannot contain '..'"
     test ! -d non-exist
 
+    println "test: compress input dir should succeed with benign uses of '..'"
+    zstd  -r tmpInputTestDir/we/.../..must/go/deeper.. --output-dir-mirror tmpout
+    test -d tmpout
+
     println "test : decompress multiple files into an output directory, --output-dir-mirror"
     zstd tmpOutDir -r -d --output-dir-mirror tmpOutDirDecomp
     test -f tmpOutDirDecomp/tmpOutDir/tmp1
-    test -f tmpOutDirDecomp/tmpOutDir/tmpInputTestDir/we/must/go/deeper/tmp2
+    test -f tmpOutDirDecomp/tmpOutDir/tmpInputTestDir/we/.../..must/go/deeper../tmp2
 
     println "test: decompress input dir will be ignored if it has '..'"
-    zstd  -r tmpOutDir/tmpInputTestDir/we/must/../must --output-dir-mirror non-exist && die "input cannot contain '..'"
+    zstd  -r tmpOutDir/tmpInputTestDir/we/.../..must/../..must --output-dir-mirror non-exist && die "input cannot contain '..'"
     test ! -d non-exist
 
     rm -rf tmp*
@@ -644,16 +774,16 @@
 ln -s helloworld.zst helloworld.link.zst
 $EXE_PREFIX ./zstdcat helloworld.link.zst > result.tmp
 $DIFF helloworld.tmp result.tmp
-rm zstdcat
-rm result.tmp
+rm -f zstdcat
+rm -f result.tmp
 println "testing zcat symlink"
 ln -sf "$ZSTD_BIN" zcat
 $EXE_PREFIX ./zcat helloworld.zst > result.tmp
 $DIFF helloworld.tmp result.tmp
 $EXE_PREFIX ./zcat helloworld.link.zst > result.tmp
 $DIFF helloworld.tmp result.tmp
-rm zcat
-rm ./*.tmp ./*.zstd
+rm -f zcat
+rm -f ./*.tmp ./*.zstd
 println "frame concatenation tests completed"
 
 
@@ -715,7 +845,7 @@
 zstd -d -v -f tmpSparseCompressed -c >> tmpSparseRegenerated
 ls -ls tmpSparse*  # look at file size and block size on disk
 $DIFF tmpSparse2M tmpSparseRegenerated
-rm tmpSparse*
+rm -f tmpSparse*
 
 
 println "\n===>  stream-size mode"
@@ -798,6 +928,8 @@
 zstd -f tmp -D tmpDict
 zstd -d tmp.zst -D tmpDict -fo result
 $DIFF "$TESTFILE" result
+println "- Dictionary compression with hlog < clog"
+zstd -6f tmp -D tmpDict --zstd=clog=25,hlog=23
 println "- Dictionary compression with btlazy2 strategy"
 zstd -f tmp -D tmpDict --zstd=strategy=6
 zstd -d tmp.zst -D tmpDict -fo result
@@ -859,7 +991,7 @@
   println "- Create dictionary with multithreading enabled"
   zstd --train -T0 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
 fi
-rm tmp* dictionary
+rm -f tmp* dictionary
 
 
 println "\n===>  fastCover dictionary builder : advanced options "
@@ -901,7 +1033,7 @@
 test -f tmpDict
 zstd --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
-rm tmp* dictionary
+rm -f tmp* dictionary
 
 
 println "\n===>  legacy dictionary builder "
@@ -929,7 +1061,7 @@
 test -f tmpDict
 zstd --train-legacy "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
-rm tmp* dictionary
+rm -f tmp* dictionary
 
 
 println "\n===>  integrity tests "
@@ -1001,7 +1133,7 @@
         gzip -t -v tmp.gz
         gzip -f tmp
         zstd -d -f -v tmp.gz
-        rm tmp*
+        rm -f tmp*
     else
         println "gzip binary not detected"
     fi
@@ -1018,7 +1150,7 @@
     zstd -f tmp
     cat tmp.gz tmp.zst tmp.gz tmp.zst | zstd -d -f -o tmp
     truncateLastByte tmp.gz | zstd -t > $INTOVOID && die "incomplete frame not detected !"
-    rm tmp*
+    rm -f tmp*
 else
     println "gzip mode not supported"
 fi
@@ -1049,7 +1181,7 @@
         lzma -Q -f -k --lzma1 tmp
         zstd -d -f -v tmp.xz
         zstd -d -f -v tmp.lzma
-        rm tmp*
+        rm -f tmp*
         println "Creating symlinks"
         ln -s "$ZSTD_BIN" ./xz
         ln -s "$ZSTD_BIN" ./unxz
@@ -1066,8 +1198,8 @@
         ./xz -d tmp.xz
         lzma -Q tmp
         ./lzma -d tmp.lzma
-        rm xz unxz lzma unlzma
-        rm tmp*
+        rm -f xz unxz lzma unlzma
+        rm -f tmp*
     else
         println "xz binary not detected"
     fi
@@ -1086,7 +1218,7 @@
     cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | zstd -d -f -o tmp
     truncateLastByte tmp.xz | zstd -t > $INTOVOID && die "incomplete frame not detected !"
     truncateLastByte tmp.lzma | zstd -t > $INTOVOID && die "incomplete frame not detected !"
-    rm tmp*
+    rm -f tmp*
 else
     println "xz mode not supported"
 fi
@@ -1105,7 +1237,7 @@
         lz4 -t -v tmp.lz4
         lz4 -f -m tmp   # ensure result is sent into tmp.lz4, not stdout
         zstd -d -f -v tmp.lz4
-        rm tmp*
+        rm -f tmp*
     else
         println "lz4 binary not detected"
     fi
@@ -1121,7 +1253,7 @@
     zstd -f tmp
     cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | zstd -d -f -o tmp
     truncateLastByte tmp.lz4 | zstd -t > $INTOVOID && die "incomplete frame not detected !"
-    rm tmp*
+    rm -f tmp*
 else
     println "\nlz4 mode not supported"
 fi
@@ -1155,7 +1287,7 @@
 datagen > tmp
 tar cf tmp.tar tmp
 zstd tmp.tar -o tmp.tzst
-rm tmp.tar
+rm -f tmp.tar
 zstd -d tmp.tzst
 [ -e tmp.tar ] || die ".tzst failed to decompress to .tar!"
 rm -f tmp.tar tmp.tzst
@@ -1216,7 +1348,7 @@
     println "\n===>  zstdmt round-trip tests "
     roundTripTest -g4M "1 -T0"
     roundTripTest -g8M "3 -T2"
-    roundTripTest -g8M "19 -T0 --long"
+    roundTripTest -g8M "19 --long"
     roundTripTest -g8000K "2 --threads=2"
     fileRoundTripTest -g4M "19 -T2 -B1M"
 
@@ -1234,7 +1366,7 @@
     ZSTD_NBTHREADS=50000000000 zstd -f mt_tmp # numeric value too large, warn and revert to default setting=
     ZSTD_NBTHREADS=2  zstd -f mt_tmp # correct usage
     ZSTD_NBTHREADS=1  zstd -f mt_tmp # correct usage: single thread
-    rm mt_tmp*
+    rm -f mt_tmp*
 
     println "\n===>  ovLog tests "
     datagen -g2MB > tmp
@@ -1258,7 +1390,7 @@
     println "\n===>  no multithreading, skipping zstdmt tests "
 fi
 
-rm tmp*
+rm -f tmp*
 
 println "\n===>  zstd --list/-l single frame tests "
 datagen > tmp1
@@ -1291,9 +1423,9 @@
 dd bs=1 count=100 if=$FULL_COMPRESSED_FILE of=$TRUNCATED_COMPRESSED_FILE
 zstd --list $TRUNCATED_COMPRESSED_FILE && die "-l must fail on truncated file"
 
-rm $TEST_DATA_FILE
-rm $FULL_COMPRESSED_FILE
-rm $TRUNCATED_COMPRESSED_FILE
+rm -f $TEST_DATA_FILE
+rm -f $FULL_COMPRESSED_FILE
+rm -f $TRUNCATED_COMPRESSED_FILE
 
 println "\n===>  zstd --list/-l errors when presented with stdin / no files"
 zstd -l && die "-l must fail on empty list of files"
@@ -1322,7 +1454,22 @@
 zstd -l tmp1.zst
 zstd -lv tmp1.zst
 
-rm tmp*
+println "\n===>  zstd trace tests "
+zstd -f --trace tmp.trace tmp1
+zstd -f --trace tmp.trace tmp1 tmp2 tmp3
+zstd -f --trace tmp.trace tmp1 tmp2 tmp3 -o /dev/null
+zstd -f --trace tmp.trace tmp1 tmp2 tmp3 --single-thread
+zstd -f --trace tmp.trace -D tmp1 tmp2 tmp3 -o /dev/null
+zstd -f --trace tmp.trace -D tmp1 tmp2 tmp3 -o /dev/null --single-thread
+zstd --trace tmp.trace -t tmp1.zst
+zstd --trace tmp.trace -t tmp1.zst tmp2.zst
+zstd -f --trace tmp.trace -d tmp1.zst
+zstd -f --trace tmp.trace -d tmp1.zst tmp2.zst tmp3.zst
+zstd -D tmp1 tmp2 -c | zstd --trace tmp.trace -t -D tmp1
+zstd -b1e10i0 --trace tmp.trace tmp1
+zstd -b1e10i0 --trace tmp.trace tmp1 tmp2 tmp3
+
+rm -f tmp*
 
 
 println "\n===>   zstd long distance matching tests "
@@ -1342,8 +1489,6 @@
 longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c)
 optCSize19wlog23=$(datagen -g2M | zstd -19 -c  --zstd=wlog=23 | wc -c)
 longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c)
-optCSize22=$(datagen -g900K | zstd -22 --ultra -c | wc -c)
-longCSize22=$(datagen -g900K | zstd -22 --ultra --long -c | wc -c)
 if [ "$longCSize16" -gt "$optCSize16" ]; then
     echo using --long on compression level 16 should not cause compressed size regression
     exit 1
@@ -1353,9 +1498,6 @@
 elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then
     echo using --long on compression level 19 with wLog=23 should not cause compressed size regression
     exit 1
-elif [ "$longCSize22" -gt "$optCSize22" ]; then
-    echo using --long on compression level 22 should not cause compressed size regression
-    exit 1
 fi
 
 
@@ -1411,6 +1553,14 @@
 zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered"
 rm -rf tmp*
 
+println "\n===> patch-from very large dictionary and file test"
+datagen -g550000000 -P0 > tmp_dict
+datagen -g100000000 -P1 > tmp_patch
+zstd --long=30 -1f --patch-from tmp_dict tmp_patch
+zstd --long=30 -df --patch-from tmp_dict tmp_patch.zst -o tmp_patch_recon
+$DIFF -s tmp_patch_recon tmp_patch
+rm -rf tmp*
+
 println "\n===> patch-from --stream-size test"
 datagen -g1000 -P50 > tmp_dict
 datagen -g1000 -P10 > tmp_patch
diff --git a/tests/poolTests.c b/tests/poolTests.c
index e1576ba..08f31c0 100644
--- a/tests/poolTests.c
+++ b/tests/poolTests.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/rateLimiter.py b/tests/rateLimiter.py
index 1068c44..2629372 100755
--- a/tests/rateLimiter.py
+++ b/tests/rateLimiter.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # ################################################################
-# Copyright (c) 2018-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/Makefile b/tests/regression/Makefile
index 87c1c2b..d0d7bcf 100644
--- a/tests/regression/Makefile
+++ b/tests/regression/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/config.c b/tests/regression/config.c
index ed6b692..4c66dd1 100644
--- a/tests/regression/config.c
+++ b/tests/regression/config.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,20 +28,134 @@
     };
 
 /* Define a config for each level we want to test with. */
-#define LEVEL(x)                                                \
-    param_value_t const level_##x##_param_values[] = {          \
-        {.param = ZSTD_c_compressionLevel, .value = x},         \
-    };                                                          \
-    config_t const level_##x = {                                \
-        .name = "level " #x,                                    \
-        .cli_args = "-" #x,                                     \
-        .param_values = PARAM_VALUES(level_##x##_param_values), \
-    };                                                          \
-    config_t const level_##x##_dict = {                         \
-        .name = "level " #x " with dict",                       \
-        .cli_args = "-" #x,                                     \
-        .param_values = PARAM_VALUES(level_##x##_param_values), \
-        .use_dictionary = 1,                                    \
+#define LEVEL(x)                                                                  \
+    param_value_t const level_##x##_param_values[] = {                            \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dms[] = {                        \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},         \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dds[] = {                        \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 1},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},         \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dictcopy[] = {                   \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceCopy},           \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dictload[] = {                   \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceLoad},           \
+    };                                                                            \
+    config_t const level_##x = {                                                  \
+        .name = "level " #x,                                                      \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values),                   \
+    };                                                                            \
+    config_t const level_##x##_dict = {                                           \
+        .name = "level " #x " with dict",                                         \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values),                   \
+        .use_dictionary = 1,                                                      \
+    };                                                                            \
+    config_t const level_##x##_dict_dms = {                                       \
+        .name = "level " #x " with dict dms",                                     \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dms),               \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };                                                                            \
+    config_t const level_##x##_dict_dds = {                                       \
+        .name = "level " #x " with dict dds",                                     \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dds),               \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };                                                                            \
+    config_t const level_##x##_dict_copy = {                                      \
+        .name = "level " #x " with dict copy",                                    \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dictcopy),          \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };                                                                            \
+    config_t const level_##x##_dict_load = {                                      \
+        .name = "level " #x " with dict load",                                    \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dictload),          \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };
+
+/* Define a config specifically to test row hash based levels and settings.
+ */
+#define ROW_LEVEL(x, y)                                                            \
+    param_value_t const row_##y##_level_##x##_param_values[] = {                   \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dms[] = {               \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},          \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dds[] = {               \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 1},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},          \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dictcopy[] = {          \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceCopy},            \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dictload[] = {          \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceLoad},            \
+    };                                                                             \
+    config_t const row_##y##_level_##x = {                                         \
+        .name = "level " #x " row " #y,                                            \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values),          \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_dms = {                              \
+        .name = "level " #x " row " #y " with dict dms",                           \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dms),      \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_dds = {                              \
+        .name = "level " #x " row " #y " with dict dds",                           \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dds),      \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_copy = {                             \
+        .name = "level " #x " row " #y" with dict copy",                          \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dictcopy), \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_load = {                             \
+        .name = "level " #x " row " #y " with dict load",                          \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dictload), \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
     };
 
 #define PARAM_VALUES(pv) \
@@ -51,6 +165,7 @@
 
 #undef LEVEL
 #undef FAST_LEVEL
+#undef ROW_LEVEL
 
 static config_t no_pledged_src_size = {
     .name = "no source size",
@@ -59,6 +174,14 @@
     .no_pledged_src_size = 1,
 };
 
+static config_t no_pledged_src_size_with_dict = {
+    .name = "no source size with dict",
+    .cli_args = "",
+    .param_values = PARAM_VALUES(level_0_param_values),
+    .no_pledged_src_size = 1,
+    .use_dictionary = 1,
+};
+
 static param_value_t const ldm_param_values[] = {
     {.param = ZSTD_c_enableLongDistanceMatching, .value = 1},
 };
@@ -186,12 +309,15 @@
 static config_t const* g_configs[] = {
 
 #define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict,
-#define LEVEL(x) &level_##x, &level_##x##_dict,
+#define LEVEL(x) &level_##x, &level_##x##_dict, &level_##x##_dict_dms, &level_##x##_dict_dds, &level_##x##_dict_copy, &level_##x##_dict_load,
+#define ROW_LEVEL(x, y) &row_##y##_level_##x, &row_##y##_level_##x##_dict_dms, &row_##y##_level_##x##_dict_dds, &row_##y##_level_##x##_dict_copy, &row_##y##_level_##x##_dict_load,
 #include "levels.h"
+#undef ROW_LEVEL
 #undef LEVEL
 #undef FAST_LEVEL
 
     &no_pledged_src_size,
+    &no_pledged_src_size_with_dict,
     &ldm,
     &mt,
     &mt_ldm,
diff --git a/tests/regression/config.h b/tests/regression/config.h
index aa563b9..dd88937 100644
--- a/tests/regression/config.h
+++ b/tests/regression/config.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -53,6 +53,11 @@
      * when the method allows it. Defaults to yes.
      */
     int no_pledged_src_size;
+    /**
+     * Boolean parameter that says that this config should only be used
+     * for methods that use the advanced compression API
+     */
+    int advanced_api_only;
 } config_t;
 
 /**
diff --git a/tests/regression/data.c b/tests/regression/data.c
index b75ac11..341b02d 100644
--- a/tests/regression/data.c
+++ b/tests/regression/data.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <stdio.h>
 #include <string.h>
+#include <stdlib.h>   /* free() */
 
 #include <sys/stat.h>
 
@@ -67,10 +68,27 @@
         },
 };
 
+data_t github_tar = {
+    .name = "github.tar",
+    .type = data_type_file,
+    .data =
+        {
+            .url = REGRESSION_RELEASE("github.tar.zst"),
+            .xxhash64 = 0xa9b1b44b020df292LL,
+        },
+    .dict =
+        {
+            .url = REGRESSION_RELEASE("github.dict.zst"),
+            .xxhash64 = 0x1eddc6f737d3cb53LL,
+
+        },
+};
+
 static data_t* g_data[] = {
     &silesia,
     &silesia_tar,
     &github,
+    &github_tar,
     NULL,
 };
 
diff --git a/tests/regression/data.h b/tests/regression/data.h
index 90ed22f..e54e6a1 100644
--- a/tests/regression/data.h
+++ b/tests/regression/data.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/levels.h b/tests/regression/levels.h
index 5e7d40a..3b211f8 100644
--- a/tests/regression/levels.h
+++ b/tests/regression/levels.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,6 +14,9 @@
 #ifndef FAST_LEVEL
 # error FAST_LEVEL(x) must be defined
 #endif
+#ifndef ROW_LEVEL
+# error ROW_LEVEL(x, y) must be defined
+#endif
 
 /**
  * The levels are chosen to trigger every strategy in every source size,
@@ -31,12 +34,22 @@
 
 LEVEL(3)
 LEVEL(4)
+/* ROW_LEVEL triggers the row hash (force enabled and disabled) with different
+ * dictionary strategies, and 16/32 row entries based on the level/searchLog.
+ * 1 == disabled, 2 == enabled.
+ */
+ROW_LEVEL(5, 1)
+ROW_LEVEL(5, 2)
 LEVEL(5)
 LEVEL(6)
+ROW_LEVEL(7, 1)
+ROW_LEVEL(7, 2)
 LEVEL(7)
 
 LEVEL(9)
 
+ROW_LEVEL(12, 1)
+ROW_LEVEL(12, 2)
 LEVEL(13)
 
 LEVEL(16)
diff --git a/tests/regression/method.c b/tests/regression/method.c
index 3c949a2..55b1154 100644
--- a/tests/regression/method.c
+++ b/tests/regression/method.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -102,6 +102,9 @@
      */
     if (base->data->type != data_type_file)
         return result_error(result_error_skip);
+    
+    if (config->advanced_api_only)
+        return result_error(result_error_skip);
 
     if (config->use_dictionary || config->no_pledged_src_size)
         return result_error(result_error_skip);
@@ -151,6 +154,9 @@
 
     if (base->data->type != data_type_dir)
         return result_error(result_error_skip);
+    
+    if (config->advanced_api_only)
+        return result_error(result_error_skip);
 
     int const level = config_get_level(config);
 
@@ -254,6 +260,9 @@
     if (config->cli_args == NULL)
         return result_error(result_error_skip);
 
+    if (config->advanced_api_only)
+        return result_error(result_error_skip);
+
     /* We don't support no pledged source size with directories. Too slow. */
     if (state->data->type == data_type_dir && config->no_pledged_src_size)
         return result_error(result_error_skip);
@@ -523,6 +532,10 @@
     result = result_error(result_error_skip);
     goto out;
   }
+  if (config->advanced_api_only) {
+    result = result_error(result_error_skip);
+    goto out;
+  }
   if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) {
     result = result_error(result_error_compression_error);
     goto out;
@@ -651,7 +664,7 @@
 };
 
 method_t const old_streaming_cdict = {
-    .name = "old streaming cdcit",
+    .name = "old streaming cdict",
     .create = buffer_state_create,
     .compress = old_streaming_compress_cdict,
     .destroy = buffer_state_destroy,
diff --git a/tests/regression/method.h b/tests/regression/method.h
index 6884e54..1a36a93 100644
--- a/tests/regression/method.h
+++ b/tests/regression/method.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/result.c b/tests/regression/result.c
index 2911722..1f879c1 100644
--- a/tests/regression/result.c
+++ b/tests/regression/result.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/result.h b/tests/regression/result.h
index 0085c2a..197fa90 100644
--- a/tests/regression/result.h
+++ b/tests/regression/result.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index 979b1d2..b94d550 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -6,16 +6,33 @@
 silesia.tar,                        level 1,                            compress simple,                    5334885
 silesia.tar,                        level 3,                            compress simple,                    4861425
 silesia.tar,                        level 4,                            compress simple,                    4799630
-silesia.tar,                        level 5,                            compress simple,                    4722324
-silesia.tar,                        level 6,                            compress simple,                    4672279
-silesia.tar,                        level 7,                            compress simple,                    4606715
-silesia.tar,                        level 9,                            compress simple,                    4554147
+silesia.tar,                        level 5,                            compress simple,                    4719256
+silesia.tar,                        level 6,                            compress simple,                    4677721
+silesia.tar,                        level 7,                            compress simple,                    4613541
+silesia.tar,                        level 9,                            compress simple,                    4555426
 silesia.tar,                        level 13,                           compress simple,                    4491764
 silesia.tar,                        level 16,                           compress simple,                    4381332
 silesia.tar,                        level 19,                           compress simple,                    4281605
 silesia.tar,                        uncompressed literals,              compress simple,                    4861425
 silesia.tar,                        uncompressed literals optimal,      compress simple,                    4281605
 silesia.tar,                        huffman literals,                   compress simple,                    6186042
+github.tar,                         level -5,                           compress simple,                    46856
+github.tar,                         level -3,                           compress simple,                    43754
+github.tar,                         level -1,                           compress simple,                    42490
+github.tar,                         level 0,                            compress simple,                    38441
+github.tar,                         level 1,                            compress simple,                    39265
+github.tar,                         level 3,                            compress simple,                    38441
+github.tar,                         level 4,                            compress simple,                    38467
+github.tar,                         level 5,                            compress simple,                    39693
+github.tar,                         level 6,                            compress simple,                    39621
+github.tar,                         level 7,                            compress simple,                    39213
+github.tar,                         level 9,                            compress simple,                    36758
+github.tar,                         level 13,                           compress simple,                    35621
+github.tar,                         level 16,                           compress simple,                    40255
+github.tar,                         level 19,                           compress simple,                    32837
+github.tar,                         uncompressed literals,              compress simple,                    38441
+github.tar,                         uncompressed literals optimal,      compress simple,                    32837
+github.tar,                         huffman literals,                   compress simple,                    42490
 silesia,                            level -5,                           compress cctx,                      6737607
 silesia,                            level -3,                           compress cctx,                      6444677
 silesia,                            level -1,                           compress cctx,                      6178460
@@ -23,10 +40,10 @@
 silesia,                            level 1,                            compress cctx,                      5313204
 silesia,                            level 3,                            compress cctx,                      4849552
 silesia,                            level 4,                            compress cctx,                      4786970
-silesia,                            level 5,                            compress cctx,                      4710236
-silesia,                            level 6,                            compress cctx,                      4660056
-silesia,                            level 7,                            compress cctx,                      4596296
-silesia,                            level 9,                            compress cctx,                      4543925
+silesia,                            level 5,                            compress cctx,                      4707794
+silesia,                            level 6,                            compress cctx,                      4666383
+silesia,                            level 7,                            compress cctx,                      4603381
+silesia,                            level 9,                            compress cctx,                      4546001
 silesia,                            level 13,                           compress cctx,                      4482135
 silesia,                            level 16,                           compress cctx,                      4377465
 silesia,                            level 19,                           compress cctx,                      4293330
@@ -36,7 +53,7 @@
 silesia,                            small window log,                   compress cctx,                      7084179
 silesia,                            small hash log,                     compress cctx,                      6555021
 silesia,                            small chain log,                    compress cctx,                      4931148
-silesia,                            explicit params,                    compress cctx,                      4794677
+silesia,                            explicit params,                    compress cctx,                      4794479
 silesia,                            uncompressed literals,              compress cctx,                      4849552
 silesia,                            uncompressed literals optimal,      compress cctx,                      4293330
 silesia,                            huffman literals,                   compress cctx,                      6178460
@@ -56,13 +73,13 @@
 github,                             level 4,                            compress cctx,                      136199
 github,                             level 4 with dict,                  compress cctx,                      41725
 github,                             level 5,                            compress cctx,                      135121
-github,                             level 5 with dict,                  compress cctx,                      38934
+github,                             level 5 with dict,                  compress cctx,                      38759
 github,                             level 6,                            compress cctx,                      135122
-github,                             level 6 with dict,                  compress cctx,                      38628
+github,                             level 6 with dict,                  compress cctx,                      38669
 github,                             level 7,                            compress cctx,                      135122
-github,                             level 7 with dict,                  compress cctx,                      38745
+github,                             level 7 with dict,                  compress cctx,                      38755
 github,                             level 9,                            compress cctx,                      135122
-github,                             level 9 with dict,                  compress cctx,                      39341
+github,                             level 9 with dict,                  compress cctx,                      39398
 github,                             level 13,                           compress cctx,                      134064
 github,                             level 13 with dict,                 compress cctx,                      39948
 github,                             level 16,                           compress cctx,                      134064
@@ -80,30 +97,30 @@
 github,                             uncompressed literals optimal,      compress cctx,                      134064
 github,                             huffman literals,                   compress cctx,                      175568
 github,                             multithreaded with advanced params, compress cctx,                      141102
-silesia,                            level -5,                           zstdcli,                            6882553
-silesia,                            level -3,                           zstdcli,                            6568424
-silesia,                            level -1,                           zstdcli,                            6183451
+silesia,                            level -5,                           zstdcli,                            6737655
+silesia,                            level -3,                           zstdcli,                            6444725
+silesia,                            level -1,                           zstdcli,                            6178508
 silesia,                            level 0,                            zstdcli,                            4849600
-silesia,                            level 1,                            zstdcli,                            5314210
+silesia,                            level 1,                            zstdcli,                            5313252
 silesia,                            level 3,                            zstdcli,                            4849600
 silesia,                            level 4,                            zstdcli,                            4787018
-silesia,                            level 5,                            zstdcli,                            4710284
-silesia,                            level 6,                            zstdcli,                            4660104
-silesia,                            level 7,                            zstdcli,                            4596344
-silesia,                            level 9,                            zstdcli,                            4543973
+silesia,                            level 5,                            zstdcli,                            4707842
+silesia,                            level 6,                            zstdcli,                            4666431
+silesia,                            level 7,                            zstdcli,                            4603429
+silesia,                            level 9,                            zstdcli,                            4546049
 silesia,                            level 13,                           zstdcli,                            4482183
-silesia,                            level 16,                           zstdcli,                            4377513
-silesia,                            level 19,                           zstdcli,                            4293378
-silesia,                            long distance mode,                 zstdcli,                            4839756
+silesia,                            level 16,                           zstdcli,                            4360299
+silesia,                            level 19,                           zstdcli,                            4283285
+silesia,                            long distance mode,                 zstdcli,                            4840806
 silesia,                            multithreaded,                      zstdcli,                            4849600
-silesia,                            multithreaded long distance mode,   zstdcli,                            4839756
-silesia,                            small window log,                   zstdcli,                            7111012
-silesia,                            small hash log,                     zstdcli,                            6555069
-silesia,                            small chain log,                    zstdcli,                            4931196
-silesia,                            explicit params,                    zstdcli,                            4797112
+silesia,                            multithreaded long distance mode,   zstdcli,                            4840806
+silesia,                            small window log,                   zstdcli,                            7095967
+silesia,                            small hash log,                     zstdcli,                            6526189
+silesia,                            small chain log,                    zstdcli,                            4912245
+silesia,                            explicit params,                    zstdcli,                            4795856
 silesia,                            uncompressed literals,              zstdcli,                            5128030
-silesia,                            uncompressed literals optimal,      zstdcli,                            4325520
-silesia,                            huffman literals,                   zstdcli,                            5331216
+silesia,                            uncompressed literals optimal,      zstdcli,                            4317944
+silesia,                            huffman literals,                   zstdcli,                            5326316
 silesia,                            multithreaded with advanced params, zstdcli,                            5128030
 silesia.tar,                        level -5,                           zstdcli,                            6738934
 silesia.tar,                        level -3,                           zstdcli,                            6448419
@@ -112,23 +129,23 @@
 silesia.tar,                        level 1,                            zstdcli,                            5336318
 silesia.tar,                        level 3,                            zstdcli,                            4861512
 silesia.tar,                        level 4,                            zstdcli,                            4800529
-silesia.tar,                        level 5,                            zstdcli,                            4723364
-silesia.tar,                        level 6,                            zstdcli,                            4673663
-silesia.tar,                        level 7,                            zstdcli,                            4608403
-silesia.tar,                        level 9,                            zstdcli,                            4554751
+silesia.tar,                        level 5,                            zstdcli,                            4720121
+silesia.tar,                        level 6,                            zstdcli,                            4678661
+silesia.tar,                        level 7,                            zstdcli,                            4614424
+silesia.tar,                        level 9,                            zstdcli,                            4556062
 silesia.tar,                        level 13,                           zstdcli,                            4491768
-silesia.tar,                        level 16,                           zstdcli,                            4381336
-silesia.tar,                        level 19,                           zstdcli,                            4281609
+silesia.tar,                        level 16,                           zstdcli,                            4356831
+silesia.tar,                        level 19,                           zstdcli,                            4264491
 silesia.tar,                        no source size,                     zstdcli,                            4861508
-silesia.tar,                        long distance mode,                 zstdcli,                            4853190
+silesia.tar,                        long distance mode,                 zstdcli,                            4853226
 silesia.tar,                        multithreaded,                      zstdcli,                            4861512
-silesia.tar,                        multithreaded long distance mode,   zstdcli,                            4853190
+silesia.tar,                        multithreaded long distance mode,   zstdcli,                            4853226
 silesia.tar,                        small window log,                   zstdcli,                            7101576
-silesia.tar,                        small hash log,                     zstdcli,                            6587959
-silesia.tar,                        small chain log,                    zstdcli,                            4943310
-silesia.tar,                        explicit params,                    zstdcli,                            4822362
+silesia.tar,                        small hash log,                     zstdcli,                            6529290
+silesia.tar,                        small chain log,                    zstdcli,                            4917022
+silesia.tar,                        explicit params,                    zstdcli,                            4821274
 silesia.tar,                        uncompressed literals,              zstdcli,                            5129559
-silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4320931
+silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4307457
 silesia.tar,                        huffman literals,                   zstdcli,                            5347610
 silesia.tar,                        multithreaded with advanced params, zstdcli,                            5129559
 github,                             level -5,                           zstdcli,                            207285
@@ -146,13 +163,13 @@
 github,                             level 4,                            zstdcli,                            138199
 github,                             level 4 with dict,                  zstdcli,                            43251
 github,                             level 5,                            zstdcli,                            137121
-github,                             level 5 with dict,                  zstdcli,                            40741
+github,                             level 5 with dict,                  zstdcli,                            40728
 github,                             level 6,                            zstdcli,                            137122
-github,                             level 6 with dict,                  zstdcli,                            40632
+github,                             level 6 with dict,                  zstdcli,                            40630
 github,                             level 7,                            zstdcli,                            137122
-github,                             level 7 with dict,                  zstdcli,                            40771
+github,                             level 7 with dict,                  zstdcli,                            40747
 github,                             level 9,                            zstdcli,                            137122
-github,                             level 9 with dict,                  zstdcli,                            41332
+github,                             level 9 with dict,                  zstdcli,                            41338
 github,                             level 13,                           zstdcli,                            136064
 github,                             level 13 with dict,                 zstdcli,                            41743
 github,                             level 16,                           zstdcli,                            136064
@@ -170,6 +187,47 @@
 github,                             uncompressed literals optimal,      zstdcli,                            159227
 github,                             huffman literals,                   zstdcli,                            144465
 github,                             multithreaded with advanced params, zstdcli,                            167915
+github.tar,                         level -5,                           zstdcli,                            46860
+github.tar,                         level -5 with dict,                 zstdcli,                            44575
+github.tar,                         level -3,                           zstdcli,                            43758
+github.tar,                         level -3 with dict,                 zstdcli,                            41451
+github.tar,                         level -1,                           zstdcli,                            42494
+github.tar,                         level -1 with dict,                 zstdcli,                            41135
+github.tar,                         level 0,                            zstdcli,                            38445
+github.tar,                         level 0 with dict,                  zstdcli,                            37999
+github.tar,                         level 1,                            zstdcli,                            39269
+github.tar,                         level 1 with dict,                  zstdcli,                            38284
+github.tar,                         level 3,                            zstdcli,                            38445
+github.tar,                         level 3 with dict,                  zstdcli,                            37999
+github.tar,                         level 4,                            zstdcli,                            38471
+github.tar,                         level 4 with dict,                  zstdcli,                            37952
+github.tar,                         level 5,                            zstdcli,                            39697
+github.tar,                         level 5 with dict,                  zstdcli,                            39032
+github.tar,                         level 6,                            zstdcli,                            39625
+github.tar,                         level 6 with dict,                  zstdcli,                            38614
+github.tar,                         level 7,                            zstdcli,                            39217
+github.tar,                         level 7 with dict,                  zstdcli,                            37871
+github.tar,                         level 9,                            zstdcli,                            36762
+github.tar,                         level 9 with dict,                  zstdcli,                            36641
+github.tar,                         level 13,                           zstdcli,                            35625
+github.tar,                         level 13 with dict,                 zstdcli,                            38730
+github.tar,                         level 16,                           zstdcli,                            40259
+github.tar,                         level 16 with dict,                 zstdcli,                            33643
+github.tar,                         level 19,                           zstdcli,                            32841
+github.tar,                         level 19 with dict,                 zstdcli,                            32899
+github.tar,                         no source size,                     zstdcli,                            38442
+github.tar,                         no source size with dict,           zstdcli,                            38004
+github.tar,                         long distance mode,                 zstdcli,                            39730
+github.tar,                         multithreaded,                      zstdcli,                            38445
+github.tar,                         multithreaded long distance mode,   zstdcli,                            39730
+github.tar,                         small window log,                   zstdcli,                            198544
+github.tar,                         small hash log,                     zstdcli,                            129874
+github.tar,                         small chain log,                    zstdcli,                            41673
+github.tar,                         explicit params,                    zstdcli,                            41227
+github.tar,                         uncompressed literals,              zstdcli,                            41126
+github.tar,                         uncompressed literals optimal,      zstdcli,                            35392
+github.tar,                         huffman literals,                   zstdcli,                            38781
+github.tar,                         multithreaded with advanced params, zstdcli,                            41126
 silesia,                            level -5,                           advanced one pass,                  6737607
 silesia,                            level -3,                           advanced one pass,                  6444677
 silesia,                            level -1,                           advanced one pass,                  6178460
@@ -177,23 +235,29 @@
 silesia,                            level 1,                            advanced one pass,                  5313204
 silesia,                            level 3,                            advanced one pass,                  4849552
 silesia,                            level 4,                            advanced one pass,                  4786970
-silesia,                            level 5,                            advanced one pass,                  4710236
-silesia,                            level 6,                            advanced one pass,                  4660056
-silesia,                            level 7,                            advanced one pass,                  4596296
-silesia,                            level 9,                            advanced one pass,                  4543925
+silesia,                            level 5 row 1,                      advanced one pass,                  4710236
+silesia,                            level 5 row 2,                      advanced one pass,                  4707794
+silesia,                            level 5,                            advanced one pass,                  4707794
+silesia,                            level 6,                            advanced one pass,                  4666383
+silesia,                            level 7 row 1,                      advanced one pass,                  4596296
+silesia,                            level 7 row 2,                      advanced one pass,                  4603381
+silesia,                            level 7,                            advanced one pass,                  4603381
+silesia,                            level 9,                            advanced one pass,                  4546001
+silesia,                            level 12 row 1,                     advanced one pass,                  4519288
+silesia,                            level 12 row 2,                     advanced one pass,                  4521397
 silesia,                            level 13,                           advanced one pass,                  4482135
-silesia,                            level 16,                           advanced one pass,                  4377465
-silesia,                            level 19,                           advanced one pass,                  4293330
+silesia,                            level 16,                           advanced one pass,                  4360251
+silesia,                            level 19,                           advanced one pass,                  4283237
 silesia,                            no source size,                     advanced one pass,                  4849552
-silesia,                            long distance mode,                 advanced one pass,                  4839708
+silesia,                            long distance mode,                 advanced one pass,                  4840738
 silesia,                            multithreaded,                      advanced one pass,                  4849552
-silesia,                            multithreaded long distance mode,   advanced one pass,                  4839708
+silesia,                            multithreaded long distance mode,   advanced one pass,                  4840758
 silesia,                            small window log,                   advanced one pass,                  7095919
-silesia,                            small hash log,                     advanced one pass,                  6555021
-silesia,                            small chain log,                    advanced one pass,                  4931148
-silesia,                            explicit params,                    advanced one pass,                  4797095
+silesia,                            small hash log,                     advanced one pass,                  6526141
+silesia,                            small chain log,                    advanced one pass,                  4912197
+silesia,                            explicit params,                    advanced one pass,                  4795856
 silesia,                            uncompressed literals,              advanced one pass,                  5127982
-silesia,                            uncompressed literals optimal,      advanced one pass,                  4325472
+silesia,                            uncompressed literals optimal,      advanced one pass,                  4317896
 silesia,                            huffman literals,                   advanced one pass,                  5326268
 silesia,                            multithreaded with advanced params, advanced one pass,                  5127982
 silesia.tar,                        level -5,                           advanced one pass,                  6738593
@@ -203,23 +267,29 @@
 silesia.tar,                        level 1,                            advanced one pass,                  5334885
 silesia.tar,                        level 3,                            advanced one pass,                  4861425
 silesia.tar,                        level 4,                            advanced one pass,                  4799630
-silesia.tar,                        level 5,                            advanced one pass,                  4722324
-silesia.tar,                        level 6,                            advanced one pass,                  4672279
-silesia.tar,                        level 7,                            advanced one pass,                  4606715
-silesia.tar,                        level 9,                            advanced one pass,                  4554147
+silesia.tar,                        level 5 row 1,                      advanced one pass,                  4722324
+silesia.tar,                        level 5 row 2,                      advanced one pass,                  4719256
+silesia.tar,                        level 5,                            advanced one pass,                  4719256
+silesia.tar,                        level 6,                            advanced one pass,                  4677721
+silesia.tar,                        level 7 row 1,                      advanced one pass,                  4606715
+silesia.tar,                        level 7 row 2,                      advanced one pass,                  4613541
+silesia.tar,                        level 7,                            advanced one pass,                  4613541
+silesia.tar,                        level 9,                            advanced one pass,                  4555426
+silesia.tar,                        level 12 row 1,                     advanced one pass,                  4529459
+silesia.tar,                        level 12 row 2,                     advanced one pass,                  4530256
 silesia.tar,                        level 13,                           advanced one pass,                  4491764
-silesia.tar,                        level 16,                           advanced one pass,                  4381332
-silesia.tar,                        level 19,                           advanced one pass,                  4281605
+silesia.tar,                        level 16,                           advanced one pass,                  4356827
+silesia.tar,                        level 19,                           advanced one pass,                  4264487
 silesia.tar,                        no source size,                     advanced one pass,                  4861425
-silesia.tar,                        long distance mode,                 advanced one pass,                  4848098
+silesia.tar,                        long distance mode,                 advanced one pass,                  4847754
 silesia.tar,                        multithreaded,                      advanced one pass,                  4861508
-silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4853186
+silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4853222
 silesia.tar,                        small window log,                   advanced one pass,                  7101530
-silesia.tar,                        small hash log,                     advanced one pass,                  6587951
-silesia.tar,                        small chain log,                    advanced one pass,                  4943307
-silesia.tar,                        explicit params,                    advanced one pass,                  4808589
+silesia.tar,                        small hash log,                     advanced one pass,                  6529232
+silesia.tar,                        small chain log,                    advanced one pass,                  4917041
+silesia.tar,                        explicit params,                    advanced one pass,                  4807380
 silesia.tar,                        uncompressed literals,              advanced one pass,                  5129458
-silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4320927
+silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4307453
 silesia.tar,                        huffman literals,                   advanced one pass,                  5347335
 silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5129555
 github,                             level -5,                           advanced one pass,                  205285
@@ -230,27 +300,102 @@
 github,                             level -1 with dict,                 advanced one pass,                  43170
 github,                             level 0,                            advanced one pass,                  136335
 github,                             level 0 with dict,                  advanced one pass,                  41148
+github,                             level 0 with dict dms,              advanced one pass,                  41148
+github,                             level 0 with dict dds,              advanced one pass,                  41148
+github,                             level 0 with dict copy,             advanced one pass,                  41124
+github,                             level 0 with dict load,             advanced one pass,                  42252
 github,                             level 1,                            advanced one pass,                  142465
 github,                             level 1 with dict,                  advanced one pass,                  41682
+github,                             level 1 with dict dms,              advanced one pass,                  41682
+github,                             level 1 with dict dds,              advanced one pass,                  41682
+github,                             level 1 with dict copy,             advanced one pass,                  41674
+github,                             level 1 with dict load,             advanced one pass,                  43755
 github,                             level 3,                            advanced one pass,                  136335
 github,                             level 3 with dict,                  advanced one pass,                  41148
+github,                             level 3 with dict dms,              advanced one pass,                  41148
+github,                             level 3 with dict dds,              advanced one pass,                  41148
+github,                             level 3 with dict copy,             advanced one pass,                  41124
+github,                             level 3 with dict load,             advanced one pass,                  42252
 github,                             level 4,                            advanced one pass,                  136199
 github,                             level 4 with dict,                  advanced one pass,                  41251
+github,                             level 4 with dict dms,              advanced one pass,                  41251
+github,                             level 4 with dict dds,              advanced one pass,                  41251
+github,                             level 4 with dict copy,             advanced one pass,                  41216
+github,                             level 4 with dict load,             advanced one pass,                  41159
+github,                             level 5 row 1,                      advanced one pass,                  135121
+github,                             level 5 row 1 with dict dms,        advanced one pass,                  38938
+github,                             level 5 row 1 with dict dds,        advanced one pass,                  38732
+github,                             level 5 row 1 with dict copy,       advanced one pass,                  38934
+github,                             level 5 row 1 with dict load,       advanced one pass,                  40725
+github,                             level 5 row 2,                      advanced one pass,                  134584
+github,                             level 5 row 2 with dict dms,        advanced one pass,                  38758
+github,                             level 5 row 2 with dict dds,        advanced one pass,                  38728
+github,                             level 5 row 2 with dict copy,       advanced one pass,                  38759
+github,                             level 5 row 2 with dict load,       advanced one pass,                  41518
 github,                             level 5,                            advanced one pass,                  135121
-github,                             level 5 with dict,                  advanced one pass,                  38938
+github,                             level 5 with dict,                  advanced one pass,                  38758
+github,                             level 5 with dict dms,              advanced one pass,                  38758
+github,                             level 5 with dict dds,              advanced one pass,                  38728
+github,                             level 5 with dict copy,             advanced one pass,                  38759
+github,                             level 5 with dict load,             advanced one pass,                  40725
 github,                             level 6,                            advanced one pass,                  135122
-github,                             level 6 with dict,                  advanced one pass,                  38632
+github,                             level 6 with dict,                  advanced one pass,                  38671
+github,                             level 6 with dict dms,              advanced one pass,                  38671
+github,                             level 6 with dict dds,              advanced one pass,                  38630
+github,                             level 6 with dict copy,             advanced one pass,                  38669
+github,                             level 6 with dict load,             advanced one pass,                  40695
+github,                             level 7 row 1,                      advanced one pass,                  135122
+github,                             level 7 row 1 with dict dms,        advanced one pass,                  38771
+github,                             level 7 row 1 with dict dds,        advanced one pass,                  38771
+github,                             level 7 row 1 with dict copy,       advanced one pass,                  38745
+github,                             level 7 row 1 with dict load,       advanced one pass,                  40695
+github,                             level 7 row 2,                      advanced one pass,                  134584
+github,                             level 7 row 2 with dict dms,        advanced one pass,                  38758
+github,                             level 7 row 2 with dict dds,        advanced one pass,                  38747
+github,                             level 7 row 2 with dict copy,       advanced one pass,                  38755
+github,                             level 7 row 2 with dict load,       advanced one pass,                  41030
 github,                             level 7,                            advanced one pass,                  135122
-github,                             level 7 with dict,                  advanced one pass,                  38771
+github,                             level 7 with dict,                  advanced one pass,                  38758
+github,                             level 7 with dict dms,              advanced one pass,                  38758
+github,                             level 7 with dict dds,              advanced one pass,                  38747
+github,                             level 7 with dict copy,             advanced one pass,                  38755
+github,                             level 7 with dict load,             advanced one pass,                  40695
 github,                             level 9,                            advanced one pass,                  135122
-github,                             level 9 with dict,                  advanced one pass,                  39332
+github,                             level 9 with dict,                  advanced one pass,                  39437
+github,                             level 9 with dict dms,              advanced one pass,                  39437
+github,                             level 9 with dict dds,              advanced one pass,                  39338
+github,                             level 9 with dict copy,             advanced one pass,                  39398
+github,                             level 9 with dict load,             advanced one pass,                  41710
+github,                             level 12 row 1,                     advanced one pass,                  134180
+github,                             level 12 row 1 with dict dms,       advanced one pass,                  39677
+github,                             level 12 row 1 with dict dds,       advanced one pass,                  39677
+github,                             level 12 row 1 with dict copy,      advanced one pass,                  39677
+github,                             level 12 row 1 with dict load,      advanced one pass,                  41166
+github,                             level 12 row 2,                     advanced one pass,                  134180
+github,                             level 12 row 2 with dict dms,       advanced one pass,                  39677
+github,                             level 12 row 2 with dict dds,       advanced one pass,                  39677
+github,                             level 12 row 2 with dict copy,      advanced one pass,                  39677
+github,                             level 12 row 2 with dict load,      advanced one pass,                  41166
 github,                             level 13,                           advanced one pass,                  134064
 github,                             level 13 with dict,                 advanced one pass,                  39743
+github,                             level 13 with dict dms,             advanced one pass,                  39743
+github,                             level 13 with dict dds,             advanced one pass,                  39743
+github,                             level 13 with dict copy,            advanced one pass,                  39948
+github,                             level 13 with dict load,            advanced one pass,                  42626
 github,                             level 16,                           advanced one pass,                  134064
 github,                             level 16 with dict,                 advanced one pass,                  37577
+github,                             level 16 with dict dms,             advanced one pass,                  37577
+github,                             level 16 with dict dds,             advanced one pass,                  37577
+github,                             level 16 with dict copy,            advanced one pass,                  37568
+github,                             level 16 with dict load,            advanced one pass,                  42340
 github,                             level 19,                           advanced one pass,                  134064
 github,                             level 19 with dict,                 advanced one pass,                  37576
+github,                             level 19 with dict dms,             advanced one pass,                  37576
+github,                             level 19 with dict dds,             advanced one pass,                  37576
+github,                             level 19 with dict copy,            advanced one pass,                  37567
+github,                             level 19 with dict load,            advanced one pass,                  39613
 github,                             no source size,                     advanced one pass,                  136335
+github,                             no source size with dict,           advanced one pass,                  41148
 github,                             long distance mode,                 advanced one pass,                  136335
 github,                             multithreaded,                      advanced one pass,                  136335
 github,                             multithreaded long distance mode,   advanced one pass,                  136335
@@ -262,6 +407,121 @@
 github,                             uncompressed literals optimal,      advanced one pass,                  157227
 github,                             huffman literals,                   advanced one pass,                  142465
 github,                             multithreaded with advanced params, advanced one pass,                  165915
+github.tar,                         level -5,                           advanced one pass,                  46856
+github.tar,                         level -5 with dict,                 advanced one pass,                  44571
+github.tar,                         level -3,                           advanced one pass,                  43754
+github.tar,                         level -3 with dict,                 advanced one pass,                  41447
+github.tar,                         level -1,                           advanced one pass,                  42490
+github.tar,                         level -1 with dict,                 advanced one pass,                  41131
+github.tar,                         level 0,                            advanced one pass,                  38441
+github.tar,                         level 0 with dict,                  advanced one pass,                  37995
+github.tar,                         level 0 with dict dms,              advanced one pass,                  38003
+github.tar,                         level 0 with dict dds,              advanced one pass,                  38003
+github.tar,                         level 0 with dict copy,             advanced one pass,                  37995
+github.tar,                         level 0 with dict load,             advanced one pass,                  37956
+github.tar,                         level 1,                            advanced one pass,                  39265
+github.tar,                         level 1 with dict,                  advanced one pass,                  38280
+github.tar,                         level 1 with dict dms,              advanced one pass,                  38290
+github.tar,                         level 1 with dict dds,              advanced one pass,                  38290
+github.tar,                         level 1 with dict copy,             advanced one pass,                  38280
+github.tar,                         level 1 with dict load,             advanced one pass,                  38729
+github.tar,                         level 3,                            advanced one pass,                  38441
+github.tar,                         level 3 with dict,                  advanced one pass,                  37995
+github.tar,                         level 3 with dict dms,              advanced one pass,                  38003
+github.tar,                         level 3 with dict dds,              advanced one pass,                  38003
+github.tar,                         level 3 with dict copy,             advanced one pass,                  37995
+github.tar,                         level 3 with dict load,             advanced one pass,                  37956
+github.tar,                         level 4,                            advanced one pass,                  38467
+github.tar,                         level 4 with dict,                  advanced one pass,                  37948
+github.tar,                         level 4 with dict dms,              advanced one pass,                  37954
+github.tar,                         level 4 with dict dds,              advanced one pass,                  37954
+github.tar,                         level 4 with dict copy,             advanced one pass,                  37948
+github.tar,                         level 4 with dict load,             advanced one pass,                  37927
+github.tar,                         level 5 row 1,                      advanced one pass,                  39788
+github.tar,                         level 5 row 1 with dict dms,        advanced one pass,                  39365
+github.tar,                         level 5 row 1 with dict dds,        advanced one pass,                  39233
+github.tar,                         level 5 row 1 with dict copy,       advanced one pass,                  39715
+github.tar,                         level 5 row 1 with dict load,       advanced one pass,                  39209
+github.tar,                         level 5 row 2,                      advanced one pass,                  39693
+github.tar,                         level 5 row 2 with dict dms,        advanced one pass,                  39024
+github.tar,                         level 5 row 2 with dict dds,        advanced one pass,                  39028
+github.tar,                         level 5 row 2 with dict copy,       advanced one pass,                  39040
+github.tar,                         level 5 row 2 with dict load,       advanced one pass,                  39037
+github.tar,                         level 5,                            advanced one pass,                  39693
+github.tar,                         level 5 with dict,                  advanced one pass,                  39040
+github.tar,                         level 5 with dict dms,              advanced one pass,                  39024
+github.tar,                         level 5 with dict dds,              advanced one pass,                  39028
+github.tar,                         level 5 with dict copy,             advanced one pass,                  39040
+github.tar,                         level 5 with dict load,             advanced one pass,                  39037
+github.tar,                         level 6,                            advanced one pass,                  39621
+github.tar,                         level 6 with dict,                  advanced one pass,                  38622
+github.tar,                         level 6 with dict dms,              advanced one pass,                  38608
+github.tar,                         level 6 with dict dds,              advanced one pass,                  38610
+github.tar,                         level 6 with dict copy,             advanced one pass,                  38622
+github.tar,                         level 6 with dict load,             advanced one pass,                  38962
+github.tar,                         level 7 row 1,                      advanced one pass,                  39206
+github.tar,                         level 7 row 1 with dict dms,        advanced one pass,                  37954
+github.tar,                         level 7 row 1 with dict dds,        advanced one pass,                  37954
+github.tar,                         level 7 row 1 with dict copy,       advanced one pass,                  38071
+github.tar,                         level 7 row 1 with dict load,       advanced one pass,                  38584
+github.tar,                         level 7 row 2,                      advanced one pass,                  39213
+github.tar,                         level 7 row 2 with dict dms,        advanced one pass,                  37848
+github.tar,                         level 7 row 2 with dict dds,        advanced one pass,                  37867
+github.tar,                         level 7 row 2 with dict copy,       advanced one pass,                  37848
+github.tar,                         level 7 row 2 with dict load,       advanced one pass,                  38582
+github.tar,                         level 7,                            advanced one pass,                  39213
+github.tar,                         level 7 with dict,                  advanced one pass,                  37848
+github.tar,                         level 7 with dict dms,              advanced one pass,                  37848
+github.tar,                         level 7 with dict dds,              advanced one pass,                  37867
+github.tar,                         level 7 with dict copy,             advanced one pass,                  37848
+github.tar,                         level 7 with dict load,             advanced one pass,                  38582
+github.tar,                         level 9,                            advanced one pass,                  36758
+github.tar,                         level 9 with dict,                  advanced one pass,                  36457
+github.tar,                         level 9 with dict dms,              advanced one pass,                  36549
+github.tar,                         level 9 with dict dds,              advanced one pass,                  36637
+github.tar,                         level 9 with dict copy,             advanced one pass,                  36457
+github.tar,                         level 9 with dict load,             advanced one pass,                  36350
+github.tar,                         level 12 row 1,                     advanced one pass,                  36435
+github.tar,                         level 12 row 1 with dict dms,       advanced one pass,                  36986
+github.tar,                         level 12 row 1 with dict dds,       advanced one pass,                  36986
+github.tar,                         level 12 row 1 with dict copy,      advanced one pass,                  36609
+github.tar,                         level 12 row 1 with dict load,      advanced one pass,                  36419
+github.tar,                         level 12 row 2,                     advanced one pass,                  36435
+github.tar,                         level 12 row 2 with dict dms,       advanced one pass,                  36986
+github.tar,                         level 12 row 2 with dict dds,       advanced one pass,                  36986
+github.tar,                         level 12 row 2 with dict copy,      advanced one pass,                  36609
+github.tar,                         level 12 row 2 with dict load,      advanced one pass,                  36424
+github.tar,                         level 13,                           advanced one pass,                  35621
+github.tar,                         level 13 with dict,                 advanced one pass,                  38726
+github.tar,                         level 13 with dict dms,             advanced one pass,                  38903
+github.tar,                         level 13 with dict dds,             advanced one pass,                  38903
+github.tar,                         level 13 with dict copy,            advanced one pass,                  38726
+github.tar,                         level 13 with dict load,            advanced one pass,                  36372
+github.tar,                         level 16,                           advanced one pass,                  40255
+github.tar,                         level 16 with dict,                 advanced one pass,                  33639
+github.tar,                         level 16 with dict dms,             advanced one pass,                  33544
+github.tar,                         level 16 with dict dds,             advanced one pass,                  33544
+github.tar,                         level 16 with dict copy,            advanced one pass,                  33639
+github.tar,                         level 16 with dict load,            advanced one pass,                  39353
+github.tar,                         level 19,                           advanced one pass,                  32837
+github.tar,                         level 19 with dict,                 advanced one pass,                  32895
+github.tar,                         level 19 with dict dms,             advanced one pass,                  32672
+github.tar,                         level 19 with dict dds,             advanced one pass,                  32672
+github.tar,                         level 19 with dict copy,            advanced one pass,                  32895
+github.tar,                         level 19 with dict load,            advanced one pass,                  32676
+github.tar,                         no source size,                     advanced one pass,                  38441
+github.tar,                         no source size with dict,           advanced one pass,                  37995
+github.tar,                         long distance mode,                 advanced one pass,                  39757
+github.tar,                         multithreaded,                      advanced one pass,                  38441
+github.tar,                         multithreaded long distance mode,   advanced one pass,                  39726
+github.tar,                         small window log,                   advanced one pass,                  198540
+github.tar,                         small hash log,                     advanced one pass,                  129870
+github.tar,                         small chain log,                    advanced one pass,                  41669
+github.tar,                         explicit params,                    advanced one pass,                  41227
+github.tar,                         uncompressed literals,              advanced one pass,                  41122
+github.tar,                         uncompressed literals optimal,      advanced one pass,                  35388
+github.tar,                         huffman literals,                   advanced one pass,                  38777
+github.tar,                         multithreaded with advanced params, advanced one pass,                  41122
 silesia,                            level -5,                           advanced one pass small out,        6737607
 silesia,                            level -3,                           advanced one pass small out,        6444677
 silesia,                            level -1,                           advanced one pass small out,        6178460
@@ -269,23 +529,29 @@
 silesia,                            level 1,                            advanced one pass small out,        5313204
 silesia,                            level 3,                            advanced one pass small out,        4849552
 silesia,                            level 4,                            advanced one pass small out,        4786970
-silesia,                            level 5,                            advanced one pass small out,        4710236
-silesia,                            level 6,                            advanced one pass small out,        4660056
-silesia,                            level 7,                            advanced one pass small out,        4596296
-silesia,                            level 9,                            advanced one pass small out,        4543925
+silesia,                            level 5 row 1,                      advanced one pass small out,        4710236
+silesia,                            level 5 row 2,                      advanced one pass small out,        4707794
+silesia,                            level 5,                            advanced one pass small out,        4707794
+silesia,                            level 6,                            advanced one pass small out,        4666383
+silesia,                            level 7 row 1,                      advanced one pass small out,        4596296
+silesia,                            level 7 row 2,                      advanced one pass small out,        4603381
+silesia,                            level 7,                            advanced one pass small out,        4603381
+silesia,                            level 9,                            advanced one pass small out,        4546001
+silesia,                            level 12 row 1,                     advanced one pass small out,        4519288
+silesia,                            level 12 row 2,                     advanced one pass small out,        4521397
 silesia,                            level 13,                           advanced one pass small out,        4482135
-silesia,                            level 16,                           advanced one pass small out,        4377465
-silesia,                            level 19,                           advanced one pass small out,        4293330
+silesia,                            level 16,                           advanced one pass small out,        4360251
+silesia,                            level 19,                           advanced one pass small out,        4283237
 silesia,                            no source size,                     advanced one pass small out,        4849552
-silesia,                            long distance mode,                 advanced one pass small out,        4839708
+silesia,                            long distance mode,                 advanced one pass small out,        4840738
 silesia,                            multithreaded,                      advanced one pass small out,        4849552
-silesia,                            multithreaded long distance mode,   advanced one pass small out,        4839708
+silesia,                            multithreaded long distance mode,   advanced one pass small out,        4840758
 silesia,                            small window log,                   advanced one pass small out,        7095919
-silesia,                            small hash log,                     advanced one pass small out,        6555021
-silesia,                            small chain log,                    advanced one pass small out,        4931148
-silesia,                            explicit params,                    advanced one pass small out,        4797095
+silesia,                            small hash log,                     advanced one pass small out,        6526141
+silesia,                            small chain log,                    advanced one pass small out,        4912197
+silesia,                            explicit params,                    advanced one pass small out,        4795856
 silesia,                            uncompressed literals,              advanced one pass small out,        5127982
-silesia,                            uncompressed literals optimal,      advanced one pass small out,        4325472
+silesia,                            uncompressed literals optimal,      advanced one pass small out,        4317896
 silesia,                            huffman literals,                   advanced one pass small out,        5326268
 silesia,                            multithreaded with advanced params, advanced one pass small out,        5127982
 silesia.tar,                        level -5,                           advanced one pass small out,        6738593
@@ -295,23 +561,29 @@
 silesia.tar,                        level 1,                            advanced one pass small out,        5334885
 silesia.tar,                        level 3,                            advanced one pass small out,        4861425
 silesia.tar,                        level 4,                            advanced one pass small out,        4799630
-silesia.tar,                        level 5,                            advanced one pass small out,        4722324
-silesia.tar,                        level 6,                            advanced one pass small out,        4672279
-silesia.tar,                        level 7,                            advanced one pass small out,        4606715
-silesia.tar,                        level 9,                            advanced one pass small out,        4554147
+silesia.tar,                        level 5 row 1,                      advanced one pass small out,        4722324
+silesia.tar,                        level 5 row 2,                      advanced one pass small out,        4719256
+silesia.tar,                        level 5,                            advanced one pass small out,        4719256
+silesia.tar,                        level 6,                            advanced one pass small out,        4677721
+silesia.tar,                        level 7 row 1,                      advanced one pass small out,        4606715
+silesia.tar,                        level 7 row 2,                      advanced one pass small out,        4613541
+silesia.tar,                        level 7,                            advanced one pass small out,        4613541
+silesia.tar,                        level 9,                            advanced one pass small out,        4555426
+silesia.tar,                        level 12 row 1,                     advanced one pass small out,        4529459
+silesia.tar,                        level 12 row 2,                     advanced one pass small out,        4530256
 silesia.tar,                        level 13,                           advanced one pass small out,        4491764
-silesia.tar,                        level 16,                           advanced one pass small out,        4381332
-silesia.tar,                        level 19,                           advanced one pass small out,        4281605
+silesia.tar,                        level 16,                           advanced one pass small out,        4356827
+silesia.tar,                        level 19,                           advanced one pass small out,        4264487
 silesia.tar,                        no source size,                     advanced one pass small out,        4861425
-silesia.tar,                        long distance mode,                 advanced one pass small out,        4848098
+silesia.tar,                        long distance mode,                 advanced one pass small out,        4847754
 silesia.tar,                        multithreaded,                      advanced one pass small out,        4861508
-silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4853186
+silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4853222
 silesia.tar,                        small window log,                   advanced one pass small out,        7101530
-silesia.tar,                        small hash log,                     advanced one pass small out,        6587951
-silesia.tar,                        small chain log,                    advanced one pass small out,        4943307
-silesia.tar,                        explicit params,                    advanced one pass small out,        4808589
+silesia.tar,                        small hash log,                     advanced one pass small out,        6529232
+silesia.tar,                        small chain log,                    advanced one pass small out,        4917041
+silesia.tar,                        explicit params,                    advanced one pass small out,        4807380
 silesia.tar,                        uncompressed literals,              advanced one pass small out,        5129458
-silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4320927
+silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4307453
 silesia.tar,                        huffman literals,                   advanced one pass small out,        5347335
 silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5129555
 github,                             level -5,                           advanced one pass small out,        205285
@@ -322,27 +594,102 @@
 github,                             level -1 with dict,                 advanced one pass small out,        43170
 github,                             level 0,                            advanced one pass small out,        136335
 github,                             level 0 with dict,                  advanced one pass small out,        41148
+github,                             level 0 with dict dms,              advanced one pass small out,        41148
+github,                             level 0 with dict dds,              advanced one pass small out,        41148
+github,                             level 0 with dict copy,             advanced one pass small out,        41124
+github,                             level 0 with dict load,             advanced one pass small out,        42252
 github,                             level 1,                            advanced one pass small out,        142465
 github,                             level 1 with dict,                  advanced one pass small out,        41682
+github,                             level 1 with dict dms,              advanced one pass small out,        41682
+github,                             level 1 with dict dds,              advanced one pass small out,        41682
+github,                             level 1 with dict copy,             advanced one pass small out,        41674
+github,                             level 1 with dict load,             advanced one pass small out,        43755
 github,                             level 3,                            advanced one pass small out,        136335
 github,                             level 3 with dict,                  advanced one pass small out,        41148
+github,                             level 3 with dict dms,              advanced one pass small out,        41148
+github,                             level 3 with dict dds,              advanced one pass small out,        41148
+github,                             level 3 with dict copy,             advanced one pass small out,        41124
+github,                             level 3 with dict load,             advanced one pass small out,        42252
 github,                             level 4,                            advanced one pass small out,        136199
 github,                             level 4 with dict,                  advanced one pass small out,        41251
+github,                             level 4 with dict dms,              advanced one pass small out,        41251
+github,                             level 4 with dict dds,              advanced one pass small out,        41251
+github,                             level 4 with dict copy,             advanced one pass small out,        41216
+github,                             level 4 with dict load,             advanced one pass small out,        41159
+github,                             level 5 row 1,                      advanced one pass small out,        135121
+github,                             level 5 row 1 with dict dms,        advanced one pass small out,        38938
+github,                             level 5 row 1 with dict dds,        advanced one pass small out,        38732
+github,                             level 5 row 1 with dict copy,       advanced one pass small out,        38934
+github,                             level 5 row 1 with dict load,       advanced one pass small out,        40725
+github,                             level 5 row 2,                      advanced one pass small out,        134584
+github,                             level 5 row 2 with dict dms,        advanced one pass small out,        38758
+github,                             level 5 row 2 with dict dds,        advanced one pass small out,        38728
+github,                             level 5 row 2 with dict copy,       advanced one pass small out,        38759
+github,                             level 5 row 2 with dict load,       advanced one pass small out,        41518
 github,                             level 5,                            advanced one pass small out,        135121
-github,                             level 5 with dict,                  advanced one pass small out,        38938
+github,                             level 5 with dict,                  advanced one pass small out,        38758
+github,                             level 5 with dict dms,              advanced one pass small out,        38758
+github,                             level 5 with dict dds,              advanced one pass small out,        38728
+github,                             level 5 with dict copy,             advanced one pass small out,        38759
+github,                             level 5 with dict load,             advanced one pass small out,        40725
 github,                             level 6,                            advanced one pass small out,        135122
-github,                             level 6 with dict,                  advanced one pass small out,        38632
+github,                             level 6 with dict,                  advanced one pass small out,        38671
+github,                             level 6 with dict dms,              advanced one pass small out,        38671
+github,                             level 6 with dict dds,              advanced one pass small out,        38630
+github,                             level 6 with dict copy,             advanced one pass small out,        38669
+github,                             level 6 with dict load,             advanced one pass small out,        40695
+github,                             level 7 row 1,                      advanced one pass small out,        135122
+github,                             level 7 row 1 with dict dms,        advanced one pass small out,        38771
+github,                             level 7 row 1 with dict dds,        advanced one pass small out,        38771
+github,                             level 7 row 1 with dict copy,       advanced one pass small out,        38745
+github,                             level 7 row 1 with dict load,       advanced one pass small out,        40695
+github,                             level 7 row 2,                      advanced one pass small out,        134584
+github,                             level 7 row 2 with dict dms,        advanced one pass small out,        38758
+github,                             level 7 row 2 with dict dds,        advanced one pass small out,        38747
+github,                             level 7 row 2 with dict copy,       advanced one pass small out,        38755
+github,                             level 7 row 2 with dict load,       advanced one pass small out,        41030
 github,                             level 7,                            advanced one pass small out,        135122
-github,                             level 7 with dict,                  advanced one pass small out,        38771
+github,                             level 7 with dict,                  advanced one pass small out,        38758
+github,                             level 7 with dict dms,              advanced one pass small out,        38758
+github,                             level 7 with dict dds,              advanced one pass small out,        38747
+github,                             level 7 with dict copy,             advanced one pass small out,        38755
+github,                             level 7 with dict load,             advanced one pass small out,        40695
 github,                             level 9,                            advanced one pass small out,        135122
-github,                             level 9 with dict,                  advanced one pass small out,        39332
+github,                             level 9 with dict,                  advanced one pass small out,        39437
+github,                             level 9 with dict dms,              advanced one pass small out,        39437
+github,                             level 9 with dict dds,              advanced one pass small out,        39338
+github,                             level 9 with dict copy,             advanced one pass small out,        39398
+github,                             level 9 with dict load,             advanced one pass small out,        41710
+github,                             level 12 row 1,                     advanced one pass small out,        134180
+github,                             level 12 row 1 with dict dms,       advanced one pass small out,        39677
+github,                             level 12 row 1 with dict dds,       advanced one pass small out,        39677
+github,                             level 12 row 1 with dict copy,      advanced one pass small out,        39677
+github,                             level 12 row 1 with dict load,      advanced one pass small out,        41166
+github,                             level 12 row 2,                     advanced one pass small out,        134180
+github,                             level 12 row 2 with dict dms,       advanced one pass small out,        39677
+github,                             level 12 row 2 with dict dds,       advanced one pass small out,        39677
+github,                             level 12 row 2 with dict copy,      advanced one pass small out,        39677
+github,                             level 12 row 2 with dict load,      advanced one pass small out,        41166
 github,                             level 13,                           advanced one pass small out,        134064
 github,                             level 13 with dict,                 advanced one pass small out,        39743
+github,                             level 13 with dict dms,             advanced one pass small out,        39743
+github,                             level 13 with dict dds,             advanced one pass small out,        39743
+github,                             level 13 with dict copy,            advanced one pass small out,        39948
+github,                             level 13 with dict load,            advanced one pass small out,        42626
 github,                             level 16,                           advanced one pass small out,        134064
 github,                             level 16 with dict,                 advanced one pass small out,        37577
+github,                             level 16 with dict dms,             advanced one pass small out,        37577
+github,                             level 16 with dict dds,             advanced one pass small out,        37577
+github,                             level 16 with dict copy,            advanced one pass small out,        37568
+github,                             level 16 with dict load,            advanced one pass small out,        42340
 github,                             level 19,                           advanced one pass small out,        134064
 github,                             level 19 with dict,                 advanced one pass small out,        37576
+github,                             level 19 with dict dms,             advanced one pass small out,        37576
+github,                             level 19 with dict dds,             advanced one pass small out,        37576
+github,                             level 19 with dict copy,            advanced one pass small out,        37567
+github,                             level 19 with dict load,            advanced one pass small out,        39613
 github,                             no source size,                     advanced one pass small out,        136335
+github,                             no source size with dict,           advanced one pass small out,        41148
 github,                             long distance mode,                 advanced one pass small out,        136335
 github,                             multithreaded,                      advanced one pass small out,        136335
 github,                             multithreaded long distance mode,   advanced one pass small out,        136335
@@ -354,6 +701,121 @@
 github,                             uncompressed literals optimal,      advanced one pass small out,        157227
 github,                             huffman literals,                   advanced one pass small out,        142465
 github,                             multithreaded with advanced params, advanced one pass small out,        165915
+github.tar,                         level -5,                           advanced one pass small out,        46856
+github.tar,                         level -5 with dict,                 advanced one pass small out,        44571
+github.tar,                         level -3,                           advanced one pass small out,        43754
+github.tar,                         level -3 with dict,                 advanced one pass small out,        41447
+github.tar,                         level -1,                           advanced one pass small out,        42490
+github.tar,                         level -1 with dict,                 advanced one pass small out,        41131
+github.tar,                         level 0,                            advanced one pass small out,        38441
+github.tar,                         level 0 with dict,                  advanced one pass small out,        37995
+github.tar,                         level 0 with dict dms,              advanced one pass small out,        38003
+github.tar,                         level 0 with dict dds,              advanced one pass small out,        38003
+github.tar,                         level 0 with dict copy,             advanced one pass small out,        37995
+github.tar,                         level 0 with dict load,             advanced one pass small out,        37956
+github.tar,                         level 1,                            advanced one pass small out,        39265
+github.tar,                         level 1 with dict,                  advanced one pass small out,        38280
+github.tar,                         level 1 with dict dms,              advanced one pass small out,        38290
+github.tar,                         level 1 with dict dds,              advanced one pass small out,        38290
+github.tar,                         level 1 with dict copy,             advanced one pass small out,        38280
+github.tar,                         level 1 with dict load,             advanced one pass small out,        38729
+github.tar,                         level 3,                            advanced one pass small out,        38441
+github.tar,                         level 3 with dict,                  advanced one pass small out,        37995
+github.tar,                         level 3 with dict dms,              advanced one pass small out,        38003
+github.tar,                         level 3 with dict dds,              advanced one pass small out,        38003
+github.tar,                         level 3 with dict copy,             advanced one pass small out,        37995
+github.tar,                         level 3 with dict load,             advanced one pass small out,        37956
+github.tar,                         level 4,                            advanced one pass small out,        38467
+github.tar,                         level 4 with dict,                  advanced one pass small out,        37948
+github.tar,                         level 4 with dict dms,              advanced one pass small out,        37954
+github.tar,                         level 4 with dict dds,              advanced one pass small out,        37954
+github.tar,                         level 4 with dict copy,             advanced one pass small out,        37948
+github.tar,                         level 4 with dict load,             advanced one pass small out,        37927
+github.tar,                         level 5 row 1,                      advanced one pass small out,        39788
+github.tar,                         level 5 row 1 with dict dms,        advanced one pass small out,        39365
+github.tar,                         level 5 row 1 with dict dds,        advanced one pass small out,        39233
+github.tar,                         level 5 row 1 with dict copy,       advanced one pass small out,        39715
+github.tar,                         level 5 row 1 with dict load,       advanced one pass small out,        39209
+github.tar,                         level 5 row 2,                      advanced one pass small out,        39693
+github.tar,                         level 5 row 2 with dict dms,        advanced one pass small out,        39024
+github.tar,                         level 5 row 2 with dict dds,        advanced one pass small out,        39028
+github.tar,                         level 5 row 2 with dict copy,       advanced one pass small out,        39040
+github.tar,                         level 5 row 2 with dict load,       advanced one pass small out,        39037
+github.tar,                         level 5,                            advanced one pass small out,        39693
+github.tar,                         level 5 with dict,                  advanced one pass small out,        39040
+github.tar,                         level 5 with dict dms,              advanced one pass small out,        39024
+github.tar,                         level 5 with dict dds,              advanced one pass small out,        39028
+github.tar,                         level 5 with dict copy,             advanced one pass small out,        39040
+github.tar,                         level 5 with dict load,             advanced one pass small out,        39037
+github.tar,                         level 6,                            advanced one pass small out,        39621
+github.tar,                         level 6 with dict,                  advanced one pass small out,        38622
+github.tar,                         level 6 with dict dms,              advanced one pass small out,        38608
+github.tar,                         level 6 with dict dds,              advanced one pass small out,        38610
+github.tar,                         level 6 with dict copy,             advanced one pass small out,        38622
+github.tar,                         level 6 with dict load,             advanced one pass small out,        38962
+github.tar,                         level 7 row 1,                      advanced one pass small out,        39206
+github.tar,                         level 7 row 1 with dict dms,        advanced one pass small out,        37954
+github.tar,                         level 7 row 1 with dict dds,        advanced one pass small out,        37954
+github.tar,                         level 7 row 1 with dict copy,       advanced one pass small out,        38071
+github.tar,                         level 7 row 1 with dict load,       advanced one pass small out,        38584
+github.tar,                         level 7 row 2,                      advanced one pass small out,        39213
+github.tar,                         level 7 row 2 with dict dms,        advanced one pass small out,        37848
+github.tar,                         level 7 row 2 with dict dds,        advanced one pass small out,        37867
+github.tar,                         level 7 row 2 with dict copy,       advanced one pass small out,        37848
+github.tar,                         level 7 row 2 with dict load,       advanced one pass small out,        38582
+github.tar,                         level 7,                            advanced one pass small out,        39213
+github.tar,                         level 7 with dict,                  advanced one pass small out,        37848
+github.tar,                         level 7 with dict dms,              advanced one pass small out,        37848
+github.tar,                         level 7 with dict dds,              advanced one pass small out,        37867
+github.tar,                         level 7 with dict copy,             advanced one pass small out,        37848
+github.tar,                         level 7 with dict load,             advanced one pass small out,        38582
+github.tar,                         level 9,                            advanced one pass small out,        36758
+github.tar,                         level 9 with dict,                  advanced one pass small out,        36457
+github.tar,                         level 9 with dict dms,              advanced one pass small out,        36549
+github.tar,                         level 9 with dict dds,              advanced one pass small out,        36637
+github.tar,                         level 9 with dict copy,             advanced one pass small out,        36457
+github.tar,                         level 9 with dict load,             advanced one pass small out,        36350
+github.tar,                         level 12 row 1,                     advanced one pass small out,        36435
+github.tar,                         level 12 row 1 with dict dms,       advanced one pass small out,        36986
+github.tar,                         level 12 row 1 with dict dds,       advanced one pass small out,        36986
+github.tar,                         level 12 row 1 with dict copy,      advanced one pass small out,        36609
+github.tar,                         level 12 row 1 with dict load,      advanced one pass small out,        36419
+github.tar,                         level 12 row 2,                     advanced one pass small out,        36435
+github.tar,                         level 12 row 2 with dict dms,       advanced one pass small out,        36986
+github.tar,                         level 12 row 2 with dict dds,       advanced one pass small out,        36986
+github.tar,                         level 12 row 2 with dict copy,      advanced one pass small out,        36609
+github.tar,                         level 12 row 2 with dict load,      advanced one pass small out,        36424
+github.tar,                         level 13,                           advanced one pass small out,        35621
+github.tar,                         level 13 with dict,                 advanced one pass small out,        38726
+github.tar,                         level 13 with dict dms,             advanced one pass small out,        38903
+github.tar,                         level 13 with dict dds,             advanced one pass small out,        38903
+github.tar,                         level 13 with dict copy,            advanced one pass small out,        38726
+github.tar,                         level 13 with dict load,            advanced one pass small out,        36372
+github.tar,                         level 16,                           advanced one pass small out,        40255
+github.tar,                         level 16 with dict,                 advanced one pass small out,        33639
+github.tar,                         level 16 with dict dms,             advanced one pass small out,        33544
+github.tar,                         level 16 with dict dds,             advanced one pass small out,        33544
+github.tar,                         level 16 with dict copy,            advanced one pass small out,        33639
+github.tar,                         level 16 with dict load,            advanced one pass small out,        39353
+github.tar,                         level 19,                           advanced one pass small out,        32837
+github.tar,                         level 19 with dict,                 advanced one pass small out,        32895
+github.tar,                         level 19 with dict dms,             advanced one pass small out,        32672
+github.tar,                         level 19 with dict dds,             advanced one pass small out,        32672
+github.tar,                         level 19 with dict copy,            advanced one pass small out,        32895
+github.tar,                         level 19 with dict load,            advanced one pass small out,        32676
+github.tar,                         no source size,                     advanced one pass small out,        38441
+github.tar,                         no source size with dict,           advanced one pass small out,        37995
+github.tar,                         long distance mode,                 advanced one pass small out,        39757
+github.tar,                         multithreaded,                      advanced one pass small out,        38441
+github.tar,                         multithreaded long distance mode,   advanced one pass small out,        39726
+github.tar,                         small window log,                   advanced one pass small out,        198540
+github.tar,                         small hash log,                     advanced one pass small out,        129870
+github.tar,                         small chain log,                    advanced one pass small out,        41669
+github.tar,                         explicit params,                    advanced one pass small out,        41227
+github.tar,                         uncompressed literals,              advanced one pass small out,        41122
+github.tar,                         uncompressed literals optimal,      advanced one pass small out,        35388
+github.tar,                         huffman literals,                   advanced one pass small out,        38777
+github.tar,                         multithreaded with advanced params, advanced one pass small out,        41122
 silesia,                            level -5,                           advanced streaming,                 6882505
 silesia,                            level -3,                           advanced streaming,                 6568376
 silesia,                            level -1,                           advanced streaming,                 6183403
@@ -361,23 +823,29 @@
 silesia,                            level 1,                            advanced streaming,                 5314162
 silesia,                            level 3,                            advanced streaming,                 4849552
 silesia,                            level 4,                            advanced streaming,                 4786970
-silesia,                            level 5,                            advanced streaming,                 4710236
-silesia,                            level 6,                            advanced streaming,                 4660056
-silesia,                            level 7,                            advanced streaming,                 4596296
-silesia,                            level 9,                            advanced streaming,                 4543925
+silesia,                            level 5 row 1,                      advanced streaming,                 4710236
+silesia,                            level 5 row 2,                      advanced streaming,                 4707794
+silesia,                            level 5,                            advanced streaming,                 4707794
+silesia,                            level 6,                            advanced streaming,                 4666383
+silesia,                            level 7 row 1,                      advanced streaming,                 4596296
+silesia,                            level 7 row 2,                      advanced streaming,                 4603381
+silesia,                            level 7,                            advanced streaming,                 4603381
+silesia,                            level 9,                            advanced streaming,                 4546001
+silesia,                            level 12 row 1,                     advanced streaming,                 4519288
+silesia,                            level 12 row 2,                     advanced streaming,                 4521397
 silesia,                            level 13,                           advanced streaming,                 4482135
-silesia,                            level 16,                           advanced streaming,                 4377465
-silesia,                            level 19,                           advanced streaming,                 4293330
+silesia,                            level 16,                           advanced streaming,                 4360251
+silesia,                            level 19,                           advanced streaming,                 4283237
 silesia,                            no source size,                     advanced streaming,                 4849516
-silesia,                            long distance mode,                 advanced streaming,                 4839708
+silesia,                            long distance mode,                 advanced streaming,                 4840738
 silesia,                            multithreaded,                      advanced streaming,                 4849552
-silesia,                            multithreaded long distance mode,   advanced streaming,                 4839708
+silesia,                            multithreaded long distance mode,   advanced streaming,                 4840758
 silesia,                            small window log,                   advanced streaming,                 7112062
-silesia,                            small hash log,                     advanced streaming,                 6555021
-silesia,                            small chain log,                    advanced streaming,                 4931148
-silesia,                            explicit params,                    advanced streaming,                 4797112
+silesia,                            small hash log,                     advanced streaming,                 6526141
+silesia,                            small chain log,                    advanced streaming,                 4912197
+silesia,                            explicit params,                    advanced streaming,                 4795887
 silesia,                            uncompressed literals,              advanced streaming,                 5127982
-silesia,                            uncompressed literals optimal,      advanced streaming,                 4325472
+silesia,                            uncompressed literals optimal,      advanced streaming,                 4317896
 silesia,                            huffman literals,                   advanced streaming,                 5331168
 silesia,                            multithreaded with advanced params, advanced streaming,                 5127982
 silesia.tar,                        level -5,                           advanced streaming,                 6982759
@@ -387,23 +855,29 @@
 silesia.tar,                        level 1,                            advanced streaming,                 5336939
 silesia.tar,                        level 3,                            advanced streaming,                 4861427
 silesia.tar,                        level 4,                            advanced streaming,                 4799630
-silesia.tar,                        level 5,                            advanced streaming,                 4722329
-silesia.tar,                        level 6,                            advanced streaming,                 4672288
-silesia.tar,                        level 7,                            advanced streaming,                 4606715
-silesia.tar,                        level 9,                            advanced streaming,                 4554154
+silesia.tar,                        level 5 row 1,                      advanced streaming,                 4722329
+silesia.tar,                        level 5 row 2,                      advanced streaming,                 4719261
+silesia.tar,                        level 5,                            advanced streaming,                 4719261
+silesia.tar,                        level 6,                            advanced streaming,                 4677729
+silesia.tar,                        level 7 row 1,                      advanced streaming,                 4606715
+silesia.tar,                        level 7 row 2,                      advanced streaming,                 4613544
+silesia.tar,                        level 7,                            advanced streaming,                 4613544
+silesia.tar,                        level 9,                            advanced streaming,                 4555432
+silesia.tar,                        level 12 row 1,                     advanced streaming,                 4529459
+silesia.tar,                        level 12 row 2,                     advanced streaming,                 4530258
 silesia.tar,                        level 13,                           advanced streaming,                 4491765
-silesia.tar,                        level 16,                           advanced streaming,                 4381350
-silesia.tar,                        level 19,                           advanced streaming,                 4281562
+silesia.tar,                        level 16,                           advanced streaming,                 4356834
+silesia.tar,                        level 19,                           advanced streaming,                 4264392
 silesia.tar,                        no source size,                     advanced streaming,                 4861423
-silesia.tar,                        long distance mode,                 advanced streaming,                 4848098
+silesia.tar,                        long distance mode,                 advanced streaming,                 4847754
 silesia.tar,                        multithreaded,                      advanced streaming,                 4861508
-silesia.tar,                        multithreaded long distance mode,   advanced streaming,                 4853186
+silesia.tar,                        multithreaded long distance mode,   advanced streaming,                 4853222
 silesia.tar,                        small window log,                   advanced streaming,                 7118769
-silesia.tar,                        small hash log,                     advanced streaming,                 6587952
-silesia.tar,                        small chain log,                    advanced streaming,                 4943312
-silesia.tar,                        explicit params,                    advanced streaming,                 4808618
+silesia.tar,                        small hash log,                     advanced streaming,                 6529235
+silesia.tar,                        small chain log,                    advanced streaming,                 4917021
+silesia.tar,                        explicit params,                    advanced streaming,                 4807401
 silesia.tar,                        uncompressed literals,              advanced streaming,                 5129461
-silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4320858
+silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4307400
 silesia.tar,                        huffman literals,                   advanced streaming,                 5352360
 silesia.tar,                        multithreaded with advanced params, advanced streaming,                 5129555
 github,                             level -5,                           advanced streaming,                 205285
@@ -414,27 +888,102 @@
 github,                             level -1 with dict,                 advanced streaming,                 43170
 github,                             level 0,                            advanced streaming,                 136335
 github,                             level 0 with dict,                  advanced streaming,                 41148
+github,                             level 0 with dict dms,              advanced streaming,                 41148
+github,                             level 0 with dict dds,              advanced streaming,                 41148
+github,                             level 0 with dict copy,             advanced streaming,                 41124
+github,                             level 0 with dict load,             advanced streaming,                 42252
 github,                             level 1,                            advanced streaming,                 142465
 github,                             level 1 with dict,                  advanced streaming,                 41682
+github,                             level 1 with dict dms,              advanced streaming,                 41682
+github,                             level 1 with dict dds,              advanced streaming,                 41682
+github,                             level 1 with dict copy,             advanced streaming,                 41674
+github,                             level 1 with dict load,             advanced streaming,                 43755
 github,                             level 3,                            advanced streaming,                 136335
 github,                             level 3 with dict,                  advanced streaming,                 41148
+github,                             level 3 with dict dms,              advanced streaming,                 41148
+github,                             level 3 with dict dds,              advanced streaming,                 41148
+github,                             level 3 with dict copy,             advanced streaming,                 41124
+github,                             level 3 with dict load,             advanced streaming,                 42252
 github,                             level 4,                            advanced streaming,                 136199
 github,                             level 4 with dict,                  advanced streaming,                 41251
+github,                             level 4 with dict dms,              advanced streaming,                 41251
+github,                             level 4 with dict dds,              advanced streaming,                 41251
+github,                             level 4 with dict copy,             advanced streaming,                 41216
+github,                             level 4 with dict load,             advanced streaming,                 41159
+github,                             level 5 row 1,                      advanced streaming,                 135121
+github,                             level 5 row 1 with dict dms,        advanced streaming,                 38938
+github,                             level 5 row 1 with dict dds,        advanced streaming,                 38732
+github,                             level 5 row 1 with dict copy,       advanced streaming,                 38934
+github,                             level 5 row 1 with dict load,       advanced streaming,                 40725
+github,                             level 5 row 2,                      advanced streaming,                 134584
+github,                             level 5 row 2 with dict dms,        advanced streaming,                 38758
+github,                             level 5 row 2 with dict dds,        advanced streaming,                 38728
+github,                             level 5 row 2 with dict copy,       advanced streaming,                 38759
+github,                             level 5 row 2 with dict load,       advanced streaming,                 41518
 github,                             level 5,                            advanced streaming,                 135121
-github,                             level 5 with dict,                  advanced streaming,                 38938
+github,                             level 5 with dict,                  advanced streaming,                 38758
+github,                             level 5 with dict dms,              advanced streaming,                 38758
+github,                             level 5 with dict dds,              advanced streaming,                 38728
+github,                             level 5 with dict copy,             advanced streaming,                 38759
+github,                             level 5 with dict load,             advanced streaming,                 40725
 github,                             level 6,                            advanced streaming,                 135122
-github,                             level 6 with dict,                  advanced streaming,                 38632
+github,                             level 6 with dict,                  advanced streaming,                 38671
+github,                             level 6 with dict dms,              advanced streaming,                 38671
+github,                             level 6 with dict dds,              advanced streaming,                 38630
+github,                             level 6 with dict copy,             advanced streaming,                 38669
+github,                             level 6 with dict load,             advanced streaming,                 40695
+github,                             level 7 row 1,                      advanced streaming,                 135122
+github,                             level 7 row 1 with dict dms,        advanced streaming,                 38771
+github,                             level 7 row 1 with dict dds,        advanced streaming,                 38771
+github,                             level 7 row 1 with dict copy,       advanced streaming,                 38745
+github,                             level 7 row 1 with dict load,       advanced streaming,                 40695
+github,                             level 7 row 2,                      advanced streaming,                 134584
+github,                             level 7 row 2 with dict dms,        advanced streaming,                 38758
+github,                             level 7 row 2 with dict dds,        advanced streaming,                 38747
+github,                             level 7 row 2 with dict copy,       advanced streaming,                 38755
+github,                             level 7 row 2 with dict load,       advanced streaming,                 41030
 github,                             level 7,                            advanced streaming,                 135122
-github,                             level 7 with dict,                  advanced streaming,                 38771
+github,                             level 7 with dict,                  advanced streaming,                 38758
+github,                             level 7 with dict dms,              advanced streaming,                 38758
+github,                             level 7 with dict dds,              advanced streaming,                 38747
+github,                             level 7 with dict copy,             advanced streaming,                 38755
+github,                             level 7 with dict load,             advanced streaming,                 40695
 github,                             level 9,                            advanced streaming,                 135122
-github,                             level 9 with dict,                  advanced streaming,                 39332
+github,                             level 9 with dict,                  advanced streaming,                 39437
+github,                             level 9 with dict dms,              advanced streaming,                 39437
+github,                             level 9 with dict dds,              advanced streaming,                 39338
+github,                             level 9 with dict copy,             advanced streaming,                 39398
+github,                             level 9 with dict load,             advanced streaming,                 41710
+github,                             level 12 row 1,                     advanced streaming,                 134180
+github,                             level 12 row 1 with dict dms,       advanced streaming,                 39677
+github,                             level 12 row 1 with dict dds,       advanced streaming,                 39677
+github,                             level 12 row 1 with dict copy,      advanced streaming,                 39677
+github,                             level 12 row 1 with dict load,      advanced streaming,                 41166
+github,                             level 12 row 2,                     advanced streaming,                 134180
+github,                             level 12 row 2 with dict dms,       advanced streaming,                 39677
+github,                             level 12 row 2 with dict dds,       advanced streaming,                 39677
+github,                             level 12 row 2 with dict copy,      advanced streaming,                 39677
+github,                             level 12 row 2 with dict load,      advanced streaming,                 41166
 github,                             level 13,                           advanced streaming,                 134064
 github,                             level 13 with dict,                 advanced streaming,                 39743
+github,                             level 13 with dict dms,             advanced streaming,                 39743
+github,                             level 13 with dict dds,             advanced streaming,                 39743
+github,                             level 13 with dict copy,            advanced streaming,                 39948
+github,                             level 13 with dict load,            advanced streaming,                 42626
 github,                             level 16,                           advanced streaming,                 134064
 github,                             level 16 with dict,                 advanced streaming,                 37577
+github,                             level 16 with dict dms,             advanced streaming,                 37577
+github,                             level 16 with dict dds,             advanced streaming,                 37577
+github,                             level 16 with dict copy,            advanced streaming,                 37568
+github,                             level 16 with dict load,            advanced streaming,                 42340
 github,                             level 19,                           advanced streaming,                 134064
 github,                             level 19 with dict,                 advanced streaming,                 37576
+github,                             level 19 with dict dms,             advanced streaming,                 37576
+github,                             level 19 with dict dds,             advanced streaming,                 37576
+github,                             level 19 with dict copy,            advanced streaming,                 37567
+github,                             level 19 with dict load,            advanced streaming,                 39613
 github,                             no source size,                     advanced streaming,                 136335
+github,                             no source size with dict,           advanced streaming,                 41148
 github,                             long distance mode,                 advanced streaming,                 136335
 github,                             multithreaded,                      advanced streaming,                 136335
 github,                             multithreaded long distance mode,   advanced streaming,                 136335
@@ -446,6 +995,121 @@
 github,                             uncompressed literals optimal,      advanced streaming,                 157227
 github,                             huffman literals,                   advanced streaming,                 142465
 github,                             multithreaded with advanced params, advanced streaming,                 165915
+github.tar,                         level -5,                           advanced streaming,                 46747
+github.tar,                         level -5 with dict,                 advanced streaming,                 44440
+github.tar,                         level -3,                           advanced streaming,                 43537
+github.tar,                         level -3 with dict,                 advanced streaming,                 41112
+github.tar,                         level -1,                           advanced streaming,                 42465
+github.tar,                         level -1 with dict,                 advanced streaming,                 41196
+github.tar,                         level 0,                            advanced streaming,                 38441
+github.tar,                         level 0 with dict,                  advanced streaming,                 37995
+github.tar,                         level 0 with dict dms,              advanced streaming,                 38003
+github.tar,                         level 0 with dict dds,              advanced streaming,                 38003
+github.tar,                         level 0 with dict copy,             advanced streaming,                 37995
+github.tar,                         level 0 with dict load,             advanced streaming,                 37956
+github.tar,                         level 1,                            advanced streaming,                 39342
+github.tar,                         level 1 with dict,                  advanced streaming,                 38293
+github.tar,                         level 1 with dict dms,              advanced streaming,                 38303
+github.tar,                         level 1 with dict dds,              advanced streaming,                 38303
+github.tar,                         level 1 with dict copy,             advanced streaming,                 38293
+github.tar,                         level 1 with dict load,             advanced streaming,                 38766
+github.tar,                         level 3,                            advanced streaming,                 38441
+github.tar,                         level 3 with dict,                  advanced streaming,                 37995
+github.tar,                         level 3 with dict dms,              advanced streaming,                 38003
+github.tar,                         level 3 with dict dds,              advanced streaming,                 38003
+github.tar,                         level 3 with dict copy,             advanced streaming,                 37995
+github.tar,                         level 3 with dict load,             advanced streaming,                 37956
+github.tar,                         level 4,                            advanced streaming,                 38467
+github.tar,                         level 4 with dict,                  advanced streaming,                 37948
+github.tar,                         level 4 with dict dms,              advanced streaming,                 37954
+github.tar,                         level 4 with dict dds,              advanced streaming,                 37954
+github.tar,                         level 4 with dict copy,             advanced streaming,                 37948
+github.tar,                         level 4 with dict load,             advanced streaming,                 37927
+github.tar,                         level 5 row 1,                      advanced streaming,                 39788
+github.tar,                         level 5 row 1 with dict dms,        advanced streaming,                 39365
+github.tar,                         level 5 row 1 with dict dds,        advanced streaming,                 39233
+github.tar,                         level 5 row 1 with dict copy,       advanced streaming,                 39715
+github.tar,                         level 5 row 1 with dict load,       advanced streaming,                 39209
+github.tar,                         level 5 row 2,                      advanced streaming,                 39693
+github.tar,                         level 5 row 2 with dict dms,        advanced streaming,                 39024
+github.tar,                         level 5 row 2 with dict dds,        advanced streaming,                 39028
+github.tar,                         level 5 row 2 with dict copy,       advanced streaming,                 39040
+github.tar,                         level 5 row 2 with dict load,       advanced streaming,                 39037
+github.tar,                         level 5,                            advanced streaming,                 39693
+github.tar,                         level 5 with dict,                  advanced streaming,                 39040
+github.tar,                         level 5 with dict dms,              advanced streaming,                 39024
+github.tar,                         level 5 with dict dds,              advanced streaming,                 39028
+github.tar,                         level 5 with dict copy,             advanced streaming,                 39040
+github.tar,                         level 5 with dict load,             advanced streaming,                 39037
+github.tar,                         level 6,                            advanced streaming,                 39621
+github.tar,                         level 6 with dict,                  advanced streaming,                 38622
+github.tar,                         level 6 with dict dms,              advanced streaming,                 38608
+github.tar,                         level 6 with dict dds,              advanced streaming,                 38610
+github.tar,                         level 6 with dict copy,             advanced streaming,                 38622
+github.tar,                         level 6 with dict load,             advanced streaming,                 38962
+github.tar,                         level 7 row 1,                      advanced streaming,                 39206
+github.tar,                         level 7 row 1 with dict dms,        advanced streaming,                 37954
+github.tar,                         level 7 row 1 with dict dds,        advanced streaming,                 37954
+github.tar,                         level 7 row 1 with dict copy,       advanced streaming,                 38071
+github.tar,                         level 7 row 1 with dict load,       advanced streaming,                 38584
+github.tar,                         level 7 row 2,                      advanced streaming,                 39213
+github.tar,                         level 7 row 2 with dict dms,        advanced streaming,                 37848
+github.tar,                         level 7 row 2 with dict dds,        advanced streaming,                 37867
+github.tar,                         level 7 row 2 with dict copy,       advanced streaming,                 37848
+github.tar,                         level 7 row 2 with dict load,       advanced streaming,                 38582
+github.tar,                         level 7,                            advanced streaming,                 39213
+github.tar,                         level 7 with dict,                  advanced streaming,                 37848
+github.tar,                         level 7 with dict dms,              advanced streaming,                 37848
+github.tar,                         level 7 with dict dds,              advanced streaming,                 37867
+github.tar,                         level 7 with dict copy,             advanced streaming,                 37848
+github.tar,                         level 7 with dict load,             advanced streaming,                 38582
+github.tar,                         level 9,                            advanced streaming,                 36758
+github.tar,                         level 9 with dict,                  advanced streaming,                 36457
+github.tar,                         level 9 with dict dms,              advanced streaming,                 36549
+github.tar,                         level 9 with dict dds,              advanced streaming,                 36637
+github.tar,                         level 9 with dict copy,             advanced streaming,                 36457
+github.tar,                         level 9 with dict load,             advanced streaming,                 36350
+github.tar,                         level 12 row 1,                     advanced streaming,                 36435
+github.tar,                         level 12 row 1 with dict dms,       advanced streaming,                 36986
+github.tar,                         level 12 row 1 with dict dds,       advanced streaming,                 36986
+github.tar,                         level 12 row 1 with dict copy,      advanced streaming,                 36609
+github.tar,                         level 12 row 1 with dict load,      advanced streaming,                 36419
+github.tar,                         level 12 row 2,                     advanced streaming,                 36435
+github.tar,                         level 12 row 2 with dict dms,       advanced streaming,                 36986
+github.tar,                         level 12 row 2 with dict dds,       advanced streaming,                 36986
+github.tar,                         level 12 row 2 with dict copy,      advanced streaming,                 36609
+github.tar,                         level 12 row 2 with dict load,      advanced streaming,                 36424
+github.tar,                         level 13,                           advanced streaming,                 35621
+github.tar,                         level 13 with dict,                 advanced streaming,                 38726
+github.tar,                         level 13 with dict dms,             advanced streaming,                 38903
+github.tar,                         level 13 with dict dds,             advanced streaming,                 38903
+github.tar,                         level 13 with dict copy,            advanced streaming,                 38726
+github.tar,                         level 13 with dict load,            advanced streaming,                 36372
+github.tar,                         level 16,                           advanced streaming,                 40255
+github.tar,                         level 16 with dict,                 advanced streaming,                 33639
+github.tar,                         level 16 with dict dms,             advanced streaming,                 33544
+github.tar,                         level 16 with dict dds,             advanced streaming,                 33544
+github.tar,                         level 16 with dict copy,            advanced streaming,                 33639
+github.tar,                         level 16 with dict load,            advanced streaming,                 39353
+github.tar,                         level 19,                           advanced streaming,                 32837
+github.tar,                         level 19 with dict,                 advanced streaming,                 32895
+github.tar,                         level 19 with dict dms,             advanced streaming,                 32672
+github.tar,                         level 19 with dict dds,             advanced streaming,                 32672
+github.tar,                         level 19 with dict copy,            advanced streaming,                 32895
+github.tar,                         level 19 with dict load,            advanced streaming,                 32676
+github.tar,                         no source size,                     advanced streaming,                 38438
+github.tar,                         no source size with dict,           advanced streaming,                 38000
+github.tar,                         long distance mode,                 advanced streaming,                 39757
+github.tar,                         multithreaded,                      advanced streaming,                 38441
+github.tar,                         multithreaded long distance mode,   advanced streaming,                 39726
+github.tar,                         small window log,                   advanced streaming,                 199558
+github.tar,                         small hash log,                     advanced streaming,                 129870
+github.tar,                         small chain log,                    advanced streaming,                 41669
+github.tar,                         explicit params,                    advanced streaming,                 41227
+github.tar,                         uncompressed literals,              advanced streaming,                 41122
+github.tar,                         uncompressed literals optimal,      advanced streaming,                 35388
+github.tar,                         huffman literals,                   advanced streaming,                 38800
+github.tar,                         multithreaded with advanced params, advanced streaming,                 41122
 silesia,                            level -5,                           old streaming,                      6882505
 silesia,                            level -3,                           old streaming,                      6568376
 silesia,                            level -1,                           old streaming,                      6183403
@@ -453,16 +1117,16 @@
 silesia,                            level 1,                            old streaming,                      5314162
 silesia,                            level 3,                            old streaming,                      4849552
 silesia,                            level 4,                            old streaming,                      4786970
-silesia,                            level 5,                            old streaming,                      4710236
-silesia,                            level 6,                            old streaming,                      4660056
-silesia,                            level 7,                            old streaming,                      4596296
-silesia,                            level 9,                            old streaming,                      4543925
+silesia,                            level 5,                            old streaming,                      4707794
+silesia,                            level 6,                            old streaming,                      4666383
+silesia,                            level 7,                            old streaming,                      4603381
+silesia,                            level 9,                            old streaming,                      4546001
 silesia,                            level 13,                           old streaming,                      4482135
-silesia,                            level 16,                           old streaming,                      4377465
-silesia,                            level 19,                           old streaming,                      4293330
+silesia,                            level 16,                           old streaming,                      4360251
+silesia,                            level 19,                           old streaming,                      4283237
 silesia,                            no source size,                     old streaming,                      4849516
 silesia,                            uncompressed literals,              old streaming,                      4849552
-silesia,                            uncompressed literals optimal,      old streaming,                      4293330
+silesia,                            uncompressed literals optimal,      old streaming,                      4283237
 silesia,                            huffman literals,                   old streaming,                      6183403
 silesia.tar,                        level -5,                           old streaming,                      6982759
 silesia.tar,                        level -3,                           old streaming,                      6641283
@@ -471,16 +1135,16 @@
 silesia.tar,                        level 1,                            old streaming,                      5336939
 silesia.tar,                        level 3,                            old streaming,                      4861427
 silesia.tar,                        level 4,                            old streaming,                      4799630
-silesia.tar,                        level 5,                            old streaming,                      4722329
-silesia.tar,                        level 6,                            old streaming,                      4672288
-silesia.tar,                        level 7,                            old streaming,                      4606715
-silesia.tar,                        level 9,                            old streaming,                      4554154
+silesia.tar,                        level 5,                            old streaming,                      4719261
+silesia.tar,                        level 6,                            old streaming,                      4677729
+silesia.tar,                        level 7,                            old streaming,                      4613544
+silesia.tar,                        level 9,                            old streaming,                      4555432
 silesia.tar,                        level 13,                           old streaming,                      4491765
-silesia.tar,                        level 16,                           old streaming,                      4381350
-silesia.tar,                        level 19,                           old streaming,                      4281562
+silesia.tar,                        level 16,                           old streaming,                      4356834
+silesia.tar,                        level 19,                           old streaming,                      4264392
 silesia.tar,                        no source size,                     old streaming,                      4861423
 silesia.tar,                        uncompressed literals,              old streaming,                      4861427
-silesia.tar,                        uncompressed literals optimal,      old streaming,                      4281562
+silesia.tar,                        uncompressed literals optimal,      old streaming,                      4264392
 silesia.tar,                        huffman literals,                   old streaming,                      6190795
 github,                             level -5,                           old streaming,                      205285
 github,                             level -5 with dict,                 old streaming,                      46718
@@ -497,13 +1161,13 @@
 github,                             level 4,                            old streaming,                      136199
 github,                             level 4 with dict,                  old streaming,                      41251
 github,                             level 5,                            old streaming,                      135121
-github,                             level 5 with dict,                  old streaming,                      38938
+github,                             level 5 with dict,                  old streaming,                      38758
 github,                             level 6,                            old streaming,                      135122
-github,                             level 6 with dict,                  old streaming,                      38632
+github,                             level 6 with dict,                  old streaming,                      38671
 github,                             level 7,                            old streaming,                      135122
-github,                             level 7 with dict,                  old streaming,                      38771
+github,                             level 7 with dict,                  old streaming,                      38758
 github,                             level 9,                            old streaming,                      135122
-github,                             level 9 with dict,                  old streaming,                      39332
+github,                             level 9 with dict,                  old streaming,                      39437
 github,                             level 13,                           old streaming,                      134064
 github,                             level 13 with dict,                 old streaming,                      39743
 github,                             level 16,                           old streaming,                      134064
@@ -511,9 +1175,43 @@
 github,                             level 19,                           old streaming,                      134064
 github,                             level 19 with dict,                 old streaming,                      37576
 github,                             no source size,                     old streaming,                      140632
+github,                             no source size with dict,           old streaming,                      40654
 github,                             uncompressed literals,              old streaming,                      136335
 github,                             uncompressed literals optimal,      old streaming,                      134064
 github,                             huffman literals,                   old streaming,                      175568
+github.tar,                         level -5,                           old streaming,                      46747
+github.tar,                         level -5 with dict,                 old streaming,                      44440
+github.tar,                         level -3,                           old streaming,                      43537
+github.tar,                         level -3 with dict,                 old streaming,                      41112
+github.tar,                         level -1,                           old streaming,                      42465
+github.tar,                         level -1 with dict,                 old streaming,                      41196
+github.tar,                         level 0,                            old streaming,                      38441
+github.tar,                         level 0 with dict,                  old streaming,                      37995
+github.tar,                         level 1,                            old streaming,                      39342
+github.tar,                         level 1 with dict,                  old streaming,                      38293
+github.tar,                         level 3,                            old streaming,                      38441
+github.tar,                         level 3 with dict,                  old streaming,                      37995
+github.tar,                         level 4,                            old streaming,                      38467
+github.tar,                         level 4 with dict,                  old streaming,                      37948
+github.tar,                         level 5,                            old streaming,                      39693
+github.tar,                         level 5 with dict,                  old streaming,                      39040
+github.tar,                         level 6,                            old streaming,                      39621
+github.tar,                         level 6 with dict,                  old streaming,                      38622
+github.tar,                         level 7,                            old streaming,                      39213
+github.tar,                         level 7 with dict,                  old streaming,                      37848
+github.tar,                         level 9,                            old streaming,                      36758
+github.tar,                         level 9 with dict,                  old streaming,                      36457
+github.tar,                         level 13,                           old streaming,                      35621
+github.tar,                         level 13 with dict,                 old streaming,                      38726
+github.tar,                         level 16,                           old streaming,                      40255
+github.tar,                         level 16 with dict,                 old streaming,                      33639
+github.tar,                         level 19,                           old streaming,                      32837
+github.tar,                         level 19 with dict,                 old streaming,                      32895
+github.tar,                         no source size,                     old streaming,                      38438
+github.tar,                         no source size with dict,           old streaming,                      38000
+github.tar,                         uncompressed literals,              old streaming,                      38441
+github.tar,                         uncompressed literals optimal,      old streaming,                      32837
+github.tar,                         huffman literals,                   old streaming,                      42465
 silesia,                            level -5,                           old streaming advanced,             6882505
 silesia,                            level -3,                           old streaming advanced,             6568376
 silesia,                            level -1,                           old streaming advanced,             6183403
@@ -521,23 +1219,23 @@
 silesia,                            level 1,                            old streaming advanced,             5314162
 silesia,                            level 3,                            old streaming advanced,             4849552
 silesia,                            level 4,                            old streaming advanced,             4786970
-silesia,                            level 5,                            old streaming advanced,             4710236
-silesia,                            level 6,                            old streaming advanced,             4660056
-silesia,                            level 7,                            old streaming advanced,             4596296
-silesia,                            level 9,                            old streaming advanced,             4543925
+silesia,                            level 5,                            old streaming advanced,             4707794
+silesia,                            level 6,                            old streaming advanced,             4666383
+silesia,                            level 7,                            old streaming advanced,             4603381
+silesia,                            level 9,                            old streaming advanced,             4546001
 silesia,                            level 13,                           old streaming advanced,             4482135
-silesia,                            level 16,                           old streaming advanced,             4377465
-silesia,                            level 19,                           old streaming advanced,             4293330
+silesia,                            level 16,                           old streaming advanced,             4360251
+silesia,                            level 19,                           old streaming advanced,             4283237
 silesia,                            no source size,                     old streaming advanced,             4849516
 silesia,                            long distance mode,                 old streaming advanced,             4849552
 silesia,                            multithreaded,                      old streaming advanced,             4849552
 silesia,                            multithreaded long distance mode,   old streaming advanced,             4849552
 silesia,                            small window log,                   old streaming advanced,             7112062
-silesia,                            small hash log,                     old streaming advanced,             6555021
-silesia,                            small chain log,                    old streaming advanced,             4931148
-silesia,                            explicit params,                    old streaming advanced,             4797112
+silesia,                            small hash log,                     old streaming advanced,             6526141
+silesia,                            small chain log,                    old streaming advanced,             4912197
+silesia,                            explicit params,                    old streaming advanced,             4795887
 silesia,                            uncompressed literals,              old streaming advanced,             4849552
-silesia,                            uncompressed literals optimal,      old streaming advanced,             4293330
+silesia,                            uncompressed literals optimal,      old streaming advanced,             4283237
 silesia,                            huffman literals,                   old streaming advanced,             6183403
 silesia,                            multithreaded with advanced params, old streaming advanced,             4849552
 silesia.tar,                        level -5,                           old streaming advanced,             6982759
@@ -547,23 +1245,23 @@
 silesia.tar,                        level 1,                            old streaming advanced,             5336939
 silesia.tar,                        level 3,                            old streaming advanced,             4861427
 silesia.tar,                        level 4,                            old streaming advanced,             4799630
-silesia.tar,                        level 5,                            old streaming advanced,             4722329
-silesia.tar,                        level 6,                            old streaming advanced,             4672288
-silesia.tar,                        level 7,                            old streaming advanced,             4606715
-silesia.tar,                        level 9,                            old streaming advanced,             4554154
+silesia.tar,                        level 5,                            old streaming advanced,             4719261
+silesia.tar,                        level 6,                            old streaming advanced,             4677729
+silesia.tar,                        level 7,                            old streaming advanced,             4613544
+silesia.tar,                        level 9,                            old streaming advanced,             4555432
 silesia.tar,                        level 13,                           old streaming advanced,             4491765
-silesia.tar,                        level 16,                           old streaming advanced,             4381350
-silesia.tar,                        level 19,                           old streaming advanced,             4281562
+silesia.tar,                        level 16,                           old streaming advanced,             4356834
+silesia.tar,                        level 19,                           old streaming advanced,             4264392
 silesia.tar,                        no source size,                     old streaming advanced,             4861423
 silesia.tar,                        long distance mode,                 old streaming advanced,             4861427
 silesia.tar,                        multithreaded,                      old streaming advanced,             4861427
 silesia.tar,                        multithreaded long distance mode,   old streaming advanced,             4861427
 silesia.tar,                        small window log,                   old streaming advanced,             7118772
-silesia.tar,                        small hash log,                     old streaming advanced,             6587952
-silesia.tar,                        small chain log,                    old streaming advanced,             4943312
-silesia.tar,                        explicit params,                    old streaming advanced,             4808618
+silesia.tar,                        small hash log,                     old streaming advanced,             6529235
+silesia.tar,                        small chain log,                    old streaming advanced,             4917021
+silesia.tar,                        explicit params,                    old streaming advanced,             4807401
 silesia.tar,                        uncompressed literals,              old streaming advanced,             4861427
-silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4281562
+silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4264392
 silesia.tar,                        huffman literals,                   old streaming advanced,             6190795
 silesia.tar,                        multithreaded with advanced params, old streaming advanced,             4861427
 github,                             level -5,                           old streaming advanced,             216734
@@ -581,13 +1279,13 @@
 github,                             level 4,                            old streaming advanced,             141104
 github,                             level 4 with dict,                  old streaming advanced,             41084
 github,                             level 5,                            old streaming advanced,             139399
-github,                             level 5 with dict,                  old streaming advanced,             39159
+github,                             level 5 with dict,                  old streaming advanced,             38633
 github,                             level 6,                            old streaming advanced,             139402
-github,                             level 6 with dict,                  old streaming advanced,             38749
+github,                             level 6 with dict,                  old streaming advanced,             38723
 github,                             level 7,                            old streaming advanced,             138676
-github,                             level 7 with dict,                  old streaming advanced,             38746
+github,                             level 7 with dict,                  old streaming advanced,             38744
 github,                             level 9,                            old streaming advanced,             138676
-github,                             level 9 with dict,                  old streaming advanced,             38993
+github,                             level 9 with dict,                  old streaming advanced,             38981
 github,                             level 13,                           old streaming advanced,             138676
 github,                             level 13 with dict,                 old streaming advanced,             39731
 github,                             level 16,                           old streaming advanced,             138676
@@ -595,6 +1293,7 @@
 github,                             level 19,                           old streaming advanced,             134064
 github,                             level 19 with dict,                 old streaming advanced,             37576
 github,                             no source size,                     old streaming advanced,             140632
+github,                             no source size with dict,           old streaming advanced,             40608
 github,                             long distance mode,                 old streaming advanced,             141104
 github,                             multithreaded,                      old streaming advanced,             141104
 github,                             multithreaded long distance mode,   old streaming advanced,             141104
@@ -606,20 +1305,77 @@
 github,                             uncompressed literals optimal,      old streaming advanced,             134064
 github,                             huffman literals,                   old streaming advanced,             181108
 github,                             multithreaded with advanced params, old streaming advanced,             141104
-github,                             level -5 with dict,                 old streaming cdcit,                46718
-github,                             level -3 with dict,                 old streaming cdcit,                45395
-github,                             level -1 with dict,                 old streaming cdcit,                43170
-github,                             level 0 with dict,                  old streaming cdcit,                41148
-github,                             level 1 with dict,                  old streaming cdcit,                41682
-github,                             level 3 with dict,                  old streaming cdcit,                41148
-github,                             level 4 with dict,                  old streaming cdcit,                41251
-github,                             level 5 with dict,                  old streaming cdcit,                38938
-github,                             level 6 with dict,                  old streaming cdcit,                38632
-github,                             level 7 with dict,                  old streaming cdcit,                38771
-github,                             level 9 with dict,                  old streaming cdcit,                39332
-github,                             level 13 with dict,                 old streaming cdcit,                39743
-github,                             level 16 with dict,                 old streaming cdcit,                37577
-github,                             level 19 with dict,                 old streaming cdcit,                37576
+github.tar,                         level -5,                           old streaming advanced,             46747
+github.tar,                         level -5 with dict,                 old streaming advanced,             44824
+github.tar,                         level -3,                           old streaming advanced,             43537
+github.tar,                         level -3 with dict,                 old streaming advanced,             41800
+github.tar,                         level -1,                           old streaming advanced,             42465
+github.tar,                         level -1 with dict,                 old streaming advanced,             41471
+github.tar,                         level 0,                            old streaming advanced,             38441
+github.tar,                         level 0 with dict,                  old streaming advanced,             38013
+github.tar,                         level 1,                            old streaming advanced,             39342
+github.tar,                         level 1 with dict,                  old streaming advanced,             38940
+github.tar,                         level 3,                            old streaming advanced,             38441
+github.tar,                         level 3 with dict,                  old streaming advanced,             38013
+github.tar,                         level 4,                            old streaming advanced,             38467
+github.tar,                         level 4 with dict,                  old streaming advanced,             38063
+github.tar,                         level 5,                            old streaming advanced,             39693
+github.tar,                         level 5 with dict,                  old streaming advanced,             39049
+github.tar,                         level 6,                            old streaming advanced,             39621
+github.tar,                         level 6 with dict,                  old streaming advanced,             38959
+github.tar,                         level 7,                            old streaming advanced,             39213
+github.tar,                         level 7 with dict,                  old streaming advanced,             38573
+github.tar,                         level 9,                            old streaming advanced,             36758
+github.tar,                         level 9 with dict,                  old streaming advanced,             36233
+github.tar,                         level 13,                           old streaming advanced,             35621
+github.tar,                         level 13 with dict,                 old streaming advanced,             36035
+github.tar,                         level 16,                           old streaming advanced,             40255
+github.tar,                         level 16 with dict,                 old streaming advanced,             38736
+github.tar,                         level 19,                           old streaming advanced,             32837
+github.tar,                         level 19 with dict,                 old streaming advanced,             32876
+github.tar,                         no source size,                     old streaming advanced,             38438
+github.tar,                         no source size with dict,           old streaming advanced,             38015
+github.tar,                         long distance mode,                 old streaming advanced,             38441
+github.tar,                         multithreaded,                      old streaming advanced,             38441
+github.tar,                         multithreaded long distance mode,   old streaming advanced,             38441
+github.tar,                         small window log,                   old streaming advanced,             199561
+github.tar,                         small hash log,                     old streaming advanced,             129870
+github.tar,                         small chain log,                    old streaming advanced,             41669
+github.tar,                         explicit params,                    old streaming advanced,             41227
+github.tar,                         uncompressed literals,              old streaming advanced,             38441
+github.tar,                         uncompressed literals optimal,      old streaming advanced,             32837
+github.tar,                         huffman literals,                   old streaming advanced,             42465
+github.tar,                         multithreaded with advanced params, old streaming advanced,             38441
+github,                             level -5 with dict,                 old streaming cdict,                46718
+github,                             level -3 with dict,                 old streaming cdict,                45395
+github,                             level -1 with dict,                 old streaming cdict,                43170
+github,                             level 0 with dict,                  old streaming cdict,                41148
+github,                             level 1 with dict,                  old streaming cdict,                41682
+github,                             level 3 with dict,                  old streaming cdict,                41148
+github,                             level 4 with dict,                  old streaming cdict,                41251
+github,                             level 5 with dict,                  old streaming cdict,                38758
+github,                             level 6 with dict,                  old streaming cdict,                38671
+github,                             level 7 with dict,                  old streaming cdict,                38758
+github,                             level 9 with dict,                  old streaming cdict,                39437
+github,                             level 13 with dict,                 old streaming cdict,                39743
+github,                             level 16 with dict,                 old streaming cdict,                37577
+github,                             level 19 with dict,                 old streaming cdict,                37576
+github,                             no source size with dict,           old streaming cdict,                40654
+github.tar,                         level -5 with dict,                 old streaming cdict,                45018
+github.tar,                         level -3 with dict,                 old streaming cdict,                41886
+github.tar,                         level -1 with dict,                 old streaming cdict,                41636
+github.tar,                         level 0 with dict,                  old streaming cdict,                37956
+github.tar,                         level 1 with dict,                  old streaming cdict,                38766
+github.tar,                         level 3 with dict,                  old streaming cdict,                37956
+github.tar,                         level 4 with dict,                  old streaming cdict,                37927
+github.tar,                         level 5 with dict,                  old streaming cdict,                39037
+github.tar,                         level 6 with dict,                  old streaming cdict,                38962
+github.tar,                         level 7 with dict,                  old streaming cdict,                38582
+github.tar,                         level 9 with dict,                  old streaming cdict,                36350
+github.tar,                         level 13 with dict,                 old streaming cdict,                36372
+github.tar,                         level 16 with dict,                 old streaming cdict,                39353
+github.tar,                         level 19 with dict,                 old streaming cdict,                32676
+github.tar,                         no source size with dict,           old streaming cdict,                38000
 github,                             level -5 with dict,                 old streaming advanced cdict,       49562
 github,                             level -3 with dict,                 old streaming advanced cdict,       44956
 github,                             level -1 with dict,                 old streaming advanced cdict,       42383
@@ -627,10 +1383,26 @@
 github,                             level 1 with dict,                  old streaming advanced cdict,       42430
 github,                             level 3 with dict,                  old streaming advanced cdict,       41113
 github,                             level 4 with dict,                  old streaming advanced cdict,       41084
-github,                             level 5 with dict,                  old streaming advanced cdict,       39159
-github,                             level 6 with dict,                  old streaming advanced cdict,       38749
-github,                             level 7 with dict,                  old streaming advanced cdict,       38746
-github,                             level 9 with dict,                  old streaming advanced cdict,       38993
+github,                             level 5 with dict,                  old streaming advanced cdict,       38633
+github,                             level 6 with dict,                  old streaming advanced cdict,       38723
+github,                             level 7 with dict,                  old streaming advanced cdict,       38744
+github,                             level 9 with dict,                  old streaming advanced cdict,       38981
 github,                             level 13 with dict,                 old streaming advanced cdict,       39731
 github,                             level 16 with dict,                 old streaming advanced cdict,       40789
 github,                             level 19 with dict,                 old streaming advanced cdict,       37576
+github,                             no source size with dict,           old streaming advanced cdict,       40608
+github.tar,                         level -5 with dict,                 old streaming advanced cdict,       44307
+github.tar,                         level -3 with dict,                 old streaming advanced cdict,       41359
+github.tar,                         level -1 with dict,                 old streaming advanced cdict,       41322
+github.tar,                         level 0 with dict,                  old streaming advanced cdict,       38013
+github.tar,                         level 1 with dict,                  old streaming advanced cdict,       39002
+github.tar,                         level 3 with dict,                  old streaming advanced cdict,       38013
+github.tar,                         level 4 with dict,                  old streaming advanced cdict,       38063
+github.tar,                         level 5 with dict,                  old streaming advanced cdict,       39049
+github.tar,                         level 6 with dict,                  old streaming advanced cdict,       38959
+github.tar,                         level 7 with dict,                  old streaming advanced cdict,       38573
+github.tar,                         level 9 with dict,                  old streaming advanced cdict,       36233
+github.tar,                         level 13 with dict,                 old streaming advanced cdict,       36035
+github.tar,                         level 16 with dict,                 old streaming advanced cdict,       38736
+github.tar,                         level 19 with dict,                 old streaming advanced cdict,       32876
+github.tar,                         no source size with dict,           old streaming advanced cdict,       38015
diff --git a/tests/regression/test.c b/tests/regression/test.c
index ff2cdba..1de6be8 100644
--- a/tests/regression/test.c
+++ b/tests/regression/test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/roundTripCrash.c b/tests/roundTripCrash.c
index c117d2c..9aa208c 100644
--- a/tests/roundTripCrash.c
+++ b/tests/roundTripCrash.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/seqgen.c b/tests/seqgen.c
index 29c0c40..1e340c8 100644
--- a/tests/seqgen.c
+++ b/tests/seqgen.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/seqgen.h b/tests/seqgen.h
index 808099b..cea3f55 100644
--- a/tests/seqgen.h
+++ b/tests/seqgen.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/test-license.py b/tests/test-license.py
index 522884d..2247765 100755
--- a/tests/test-license.py
+++ b/tests/test-license.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # ################################################################
-# Copyright (c) 2016-2020, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -10,16 +10,12 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################
 
-import datetime
 import enum
 import glob
 import os
+import re
 import sys
 
-YEAR = datetime.datetime.now().year
-
-YEAR_STR = str(YEAR)
-
 ROOT = os.path.join(os.path.dirname(__file__), "..")
 
 RELDIRS = [
@@ -28,22 +24,26 @@
     "lib",
     "programs",
     "tests",
+    "contrib/linux-kernel",
 ]
 
-DIRS = [os.path.join(ROOT, d) for d in RELDIRS]
+REL_EXCLUDES = [
+    "contrib/linux-kernel/test/include",
+]
 
-class File(enum.Enum):
-    C = 1
-    H = 2
-    MAKE = 3
-    PY = 4
+def to_abs(d):
+    return os.path.normpath(os.path.join(ROOT, d)) + "/"
 
-SUFFIX = {
-    File.C: ".c",
-    File.H: ".h",
-    File.MAKE: "Makefile",
-    File.PY: ".py",
-}
+DIRS = [to_abs(d) for d in RELDIRS]
+EXCLUDES = [to_abs(d) for d in REL_EXCLUDES]
+
+SUFFIXES = [
+    ".c",
+    ".h",
+    "Makefile",
+    ".mk",
+    ".py",
+]
 
 # License should certainly be in the first 10 KB.
 MAX_BYTES = 10000
@@ -69,10 +69,13 @@
     # From divsufsort
     "divsufsort.c",
     "divsufsort.h",
+    # License is slightly different because it references GitHub
+    "linux_zstd.h",
 }
 
 
 def valid_copyright(lines):
+    YEAR_REGEX = re.compile("\d\d\d\d|present")
     for line in lines:
         line = line.strip()
         if "Copyright" not in line:
@@ -81,8 +84,9 @@
             return (False, f"Copyright line '{line}' contains 'present'!")
         if "Facebook, Inc" not in line:
             return (False, f"Copyright line '{line}' does not contain 'Facebook, Inc'")
-        if YEAR_STR not in line:
-            return (False, f"Copyright line '{line}' does not contain {YEAR}")
+        year = YEAR_REGEX.search(line)
+        if year is not None:
+            return (False, f"Copyright line '{line}' contains {year.group(0)}; it should be yearless")
         if " (c) " not in line:
             return (False, f"Copyright line '{line}' does not contain ' (c) '!")
         return (True, "")
@@ -107,35 +111,45 @@
     with open(filename, "r") as f:
         lines = f.readlines(MAX_BYTES)
     lines = lines[:min(len(lines), MAX_LINES)]
-                
+
     ok = True
     if os.path.basename(filename) not in COPYRIGHT_EXCEPTIONS:
         c_ok, c_msg = valid_copyright(lines)
         if not c_ok:
-            print(f"{filename}: {c_msg}")
+            print(f"{filename}: {c_msg}", file=sys.stderr)
             ok = False
     if os.path.basename(filename) not in LICENSE_EXCEPTIONS:
         l_ok, l_msg = valid_license(lines)
         if not l_ok:
-            print(f"{filename}: {l_msg}")
+            print(f"{filename}: {l_msg}", file=sys.stderr)
             ok = False
     return ok
 
 
+def exclude(filename):
+    for x in EXCLUDES:
+        if filename.startswith(x):
+            return True
+    return False
+
 def main():
     invalid_files = []
     for directory in DIRS:
-        for suffix in SUFFIX.values():
-            files = set(glob.glob(f"{directory}/*{suffix}"))
-            files |= set(glob.glob(f"{directory}/**/*{suffix}"))
+        for suffix in SUFFIXES:
+            files = set(glob.glob(f"{directory}/**/*{suffix}", recursive=True))
             for filename in files:
+                if exclude(filename):
+                    continue
                 if not valid_file(filename):
                     invalid_files.append(filename)
     if len(invalid_files) > 0:
-        print(f"Invalid files: {invalid_files}")
+        print("Fail!", file=sys.stderr)
+        for f in invalid_files:
+            print(f)
+        return 1
     else:
-        print("Pass!")
-    return len(invalid_files)
+        print("Pass!", file=sys.stderr)
+        return 0
 
 if __name__ == "__main__":
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py
index fa21717..c86af7d 100755
--- a/tests/test-zstd-versions.py
+++ b/tests/test-zstd-versions.py
@@ -2,7 +2,7 @@
 """Test zstd interoperability between versions"""
 
 # ################################################################
-# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/zbufftest.c b/tests/zbufftest.c
deleted file mode 100644
index cd3706a..0000000
--- a/tests/zbufftest.c
+++ /dev/null
@@ -1,625 +0,0 @@
-/*
- * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/*-************************************
-*  Compiler specific
-**************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define _CRT_SECURE_NO_WARNINGS     /* fgets */
-#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
-#endif
-
-
-/*-************************************
-*  Includes
-**************************************/
-#include <stdlib.h>       /* free */
-#include <stdio.h>        /* fgets, sscanf */
-#include <string.h>       /* strcmp */
-#include "timefn.h"       /* UTIL_time_t */
-#include "mem.h"
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_maxCLevel */
-#include "zstd.h"         /* ZSTD_compressBound */
-#define ZBUFF_STATIC_LINKING_ONLY  /* ZBUFF_createCCtx_advanced */
-#include "zbuff.h"        /* ZBUFF_isError */
-#include "datagen.h"      /* RDG_genBuffer */
-#define XXH_STATIC_LINKING_ONLY
-#include "xxhash.h"       /* XXH64_* */
-#include "util.h"
-#include "assert.h"
-
-
-/*-************************************
-*  Constants
-**************************************/
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
-static const U32 nbTestsDefault = 10000;
-#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
-#define FUZ_COMPRESSIBILITY_DEFAULT 50
-static const U32 prime1 = 2654435761U;
-static const U32 prime2 = 2246822519U;
-
-
-
-/*-************************************
-*  Display Macros
-**************************************/
-#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static U32 g_displayLevel = 2;
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
-            { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
-
-static U64 g_clockTime = 0;
-
-
-/*-*******************************************************
-*  Fuzzer functions
-*********************************************************/
-#undef MIN
-#undef MAX
-#define MIN(a,b) ((a)<(b)?(a):(b))
-#define MAX(a,b) ((a)>(b)?(a):(b))
-/*! FUZ_rand() :
-    @return : a 27 bits random value, from a 32-bits `seed`.
-    `seed` is also modified */
-#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-static unsigned int FUZ_rand(unsigned int* seedPtr)
-{
-    U32 rand32 = *seedPtr;
-    rand32 *= prime1;
-    rand32 += prime2;
-    rand32  = FUZ_rotl32(rand32, 13);
-    *seedPtr = rand32;
-    return rand32 >> 5;
-}
-
-
-/*
-static unsigned FUZ_highbit32(U32 v32)
-{
-    unsigned nbBits = 0;
-    if (v32==0) return 0;
-    for ( ; v32 ; v32>>=1) nbBits++;
-    return nbBits;
-}
-*/
-
-static void* ZBUFF_allocFunction(void* opaque, size_t size)
-{
-    void* address = malloc(size);
-    (void)opaque;
-    /* DISPLAYLEVEL(4, "alloc %p, %d opaque=%p \n", address, (int)size, opaque); */
-    return address;
-}
-
-static void ZBUFF_freeFunction(void* opaque, void* address)
-{
-    (void)opaque;
-    /* if (address) DISPLAYLEVEL(4, "free %p opaque=%p \n", address, opaque); */
-    free(address);
-}
-
-static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
-{
-    int testResult = 0;
-    size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
-    void* CNBuffer = malloc(CNBufferSize);
-    size_t const skippableFrameSize = 11;
-    size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH);
-    void* compressedBuffer = malloc(compressedBufferSize);
-    size_t const decodedBufferSize = CNBufferSize;
-    void* decodedBuffer = malloc(decodedBufferSize);
-    size_t cSize, readSize, readSkipSize, genSize;
-    U32 testNb=0;
-    ZBUFF_CCtx* zc = ZBUFF_createCCtx_advanced(customMem);
-    ZBUFF_DCtx* zd = ZBUFF_createDCtx_advanced(customMem);
-
-    /* Create compressible test buffer */
-    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
-        DISPLAY("Not enough memory, aborting\n");
-        goto _output_error;
-    }
-    RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
-
-    /* generate skippable frame */
-    MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START);
-    MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize);
-    cSize = skippableFrameSize + 8;
-
-    /* Basic compression test */
-    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    ZBUFF_compressInitDictionary(zc, CNBuffer, 128 KB, 1);
-    readSize = CNBufferSize;
-    genSize = compressedBufferSize;
-    { size_t const r = ZBUFF_compressContinue(zc, ((char*)compressedBuffer)+cSize, &genSize, CNBuffer, &readSize);
-      if (ZBUFF_isError(r)) goto _output_error; }
-    if (readSize != CNBufferSize) goto _output_error;   /* entire input should be consumed */
-    cSize += genSize;
-    genSize = compressedBufferSize - cSize;
-    { size_t const r = ZBUFF_compressEnd(zc, ((char*)compressedBuffer)+cSize, &genSize);
-      if (r != 0) goto _output_error; }  /* error, or some data not flushed */
-    cSize += genSize;
-    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
-
-    /* skippable frame test */
-    DISPLAYLEVEL(4, "test%3i : decompress skippable frame : ", testNb++);
-    ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB);
-    readSkipSize = cSize;
-    genSize = CNBufferSize;
-    { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, compressedBuffer, &readSkipSize);
-      if (r != 0) goto _output_error; }
-    if (genSize != 0) goto _output_error;   /* skippable frame len is 0 */
-    DISPLAYLEVEL(4, "OK \n");
-
-    /* Basic decompression test */
-    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB);
-    readSize = cSize - readSkipSize;
-    genSize = CNBufferSize;
-    { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, ((char*)compressedBuffer)+readSkipSize, &readSize);
-      if (r != 0) goto _output_error; }  /* should reach end of frame == 0; otherwise, some data left, or an error */
-    if (genSize != CNBufferSize) goto _output_error;   /* should regenerate the same amount */
-    if (readSize+readSkipSize != cSize) goto _output_error;   /* should have read the entire frame */
-    DISPLAYLEVEL(4, "OK \n");
-
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedCInSize : ", testNb++); { assert(ZBUFF_recommendedCInSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedCOutSize : ", testNb++); { assert(ZBUFF_recommendedCOutSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedDInSize : ", testNb++); { assert(ZBUFF_recommendedDInSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedDOutSize : ", testNb++); { assert(ZBUFF_recommendedDOutSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-
-    /* check regenerated data is byte exact */
-    DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-    {   size_t i;
-        for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
-    }   }
-    DISPLAYLEVEL(4, "OK \n");
-
-    /* Byte-by-byte decompression test */
-    DISPLAYLEVEL(4, "test%3i : decompress byte-by-byte : ", testNb++);
-    {   size_t r, pIn=0, pOut=0;
-        do
-        {   ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB);
-            r = 1;
-            while (r) {
-                size_t inS = 1;
-                size_t outS = 1;
-                r = ZBUFF_decompressContinue(zd, ((BYTE*)decodedBuffer)+pOut, &outS, ((BYTE*)compressedBuffer)+pIn, &inS);
-                pIn += inS;
-                pOut += outS;
-            }
-            readSize = pIn;
-            genSize = pOut;
-        } while (genSize==0);
-    }
-    if (genSize != CNBufferSize) goto _output_error;   /* should regenerate the same amount */
-    if (readSize != cSize) goto _output_error;   /* should have read the entire frame */
-    DISPLAYLEVEL(4, "OK \n");
-
-    /* check regenerated data is byte exact */
-    DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-    {   size_t i;
-        for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
-    }   }
-    DISPLAYLEVEL(4, "OK \n");
-
-_end:
-    ZBUFF_freeCCtx(zc);
-    ZBUFF_freeDCtx(zd);
-    free(CNBuffer);
-    free(compressedBuffer);
-    free(decodedBuffer);
-    return testResult;
-
-_output_error:
-    testResult = 1;
-    DISPLAY("Error detected in Unit tests ! \n");
-    goto _end;
-}
-
-
-static size_t findDiff(const void* buf1, const void* buf2, size_t max)
-{
-    const BYTE* b1 = (const BYTE*)buf1;
-    const BYTE* b2 = (const BYTE*)buf2;
-    size_t u;
-    for (u=0; u<max; u++) {
-        if (b1[u] != b2[u]) break;
-    }
-    return u;
-}
-
-static size_t FUZ_rLogLength(U32* seed, U32 logLength)
-{
-    size_t const lengthMask = ((size_t)1 << logLength) - 1;
-    return (lengthMask+1) + (FUZ_rand(seed) & lengthMask);
-}
-
-static size_t FUZ_randomLength(U32* seed, U32 maxLog)
-{
-    U32 const logLength = FUZ_rand(seed) % maxLog;
-    return FUZ_rLogLength(seed, logLength);
-}
-
-#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
-                         DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
-
-static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
-{
-    static const U32 maxSrcLog = 24;
-    static const U32 maxSampleLog = 19;
-    BYTE* cNoiseBuffer[5];
-    size_t const srcBufferSize = (size_t)1<<maxSrcLog;
-    BYTE* copyBuffer;
-    size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog);
-    BYTE* cBuffer;
-    size_t const cBufferSize   = ZSTD_compressBound(srcBufferSize);
-    BYTE* dstBuffer;
-    size_t dstBufferSize = srcBufferSize;
-    U32 result = 0;
-    U32 testNb = 0;
-    U32 coreSeed = seed;
-    ZBUFF_CCtx* zc;
-    ZBUFF_DCtx* zd;
-    UTIL_time_t startClock = UTIL_getTime();
-
-    /* allocations */
-    zc = ZBUFF_createCCtx();
-    zd = ZBUFF_createDCtx();
-    cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
-    copyBuffer= (BYTE*)malloc (copyBufferSize);
-    dstBuffer = (BYTE*)malloc (dstBufferSize);
-    cBuffer   = (BYTE*)malloc (cBufferSize);
-    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] ||
-           !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd,
-           "Not enough memory, fuzzer tests cancelled");
-
-    /* Create initial samples */
-    RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed);    /* pure noise */
-    RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed);    /* barely compressible */
-    RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
-    RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed);    /* highly compressible */
-    RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
-    memset(copyBuffer, 0x65, copyBufferSize);                             /* make copyBuffer considered initialized */
-
-    /* catch up testNb */
-    for (testNb=1; testNb < startTest; testNb++)
-        FUZ_rand(&coreSeed);
-
-    /* test loop */
-    for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) {
-        U32 lseed;
-        const BYTE* srcBuffer;
-        const BYTE* dict;
-        size_t maxTestSize, dictSize;
-        size_t cSize, totalTestSize, totalCSize, totalGenSize;
-        size_t errorCode;
-        U32 n, nbChunks;
-        XXH64_state_t xxhState;
-        U64 crcOrig;
-
-        /* init */
-        DISPLAYUPDATE(2, "\r%6u", testNb);
-        if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u   ", nbTests);
-        FUZ_rand(&coreSeed);
-        lseed = coreSeed ^ prime1;
-
-        /* states full reset (unsynchronized) */
-        /* some issues only happen when reusing states in a specific sequence of parameters */
-        if ((FUZ_rand(&lseed) & 0xFF) == 131) { ZBUFF_freeCCtx(zc); zc = ZBUFF_createCCtx(); }
-        if ((FUZ_rand(&lseed) & 0xFF) == 132) { ZBUFF_freeDCtx(zd); zd = ZBUFF_createDCtx(); }
-
-        /* srcBuffer selection [0-4] */
-        {   U32 buffNb = FUZ_rand(&lseed) & 0x7F;
-            if (buffNb & 7) buffNb=2;   /* most common : compressible (P) */
-            else {
-                buffNb >>= 3;
-                if (buffNb & 7) {
-                    const U32 tnb[2] = { 1, 3 };   /* barely/highly compressible */
-                    buffNb = tnb[buffNb >> 3];
-                } else {
-                    const U32 tnb[2] = { 0, 4 };   /* not compressible / sparse */
-                    buffNb = tnb[buffNb >> 3];
-            }   }
-            srcBuffer = cNoiseBuffer[buffNb];
-        }
-
-        /* compression init */
-        {   U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
-            U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1;
-            maxTestSize = FUZ_rLogLength(&lseed, testLog);
-            dictSize  = (FUZ_rand(&lseed)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0;
-            /* random dictionary selection */
-            {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
-                dict = srcBuffer + dictStart;
-            }
-            {   ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize);
-                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
-                {   size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, ZSTD_CONTENTSIZE_UNKNOWN);
-                    CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError));
-        }   }   }
-
-        /* multi-segments compression test */
-        XXH64_reset(&xxhState, 0);
-        nbChunks    = (FUZ_rand(&lseed) & 127) + 2;
-        for (n=0, cSize=0, totalTestSize=0 ; (n<nbChunks) && (totalTestSize < maxTestSize) ; n++) {
-            /* compress random chunk into random size dst buffer */
-            {   size_t readChunkSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - readChunkSize);
-
-                size_t const compressionError = ZBUFF_compressContinue(zc, cBuffer+cSize, &dstBuffSize, srcBuffer+srcStart, &readChunkSize);
-                CHECK (ZBUFF_isError(compressionError), "compression error : %s", ZBUFF_getErrorName(compressionError));
-
-                XXH64_update(&xxhState, srcBuffer+srcStart, readChunkSize);
-                memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, readChunkSize);
-                cSize += dstBuffSize;
-                totalTestSize += readChunkSize;
-            }
-
-            /* random flush operation, to mess around */
-            if ((FUZ_rand(&lseed) & 15) == 0) {
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                size_t const flushError = ZBUFF_compressFlush(zc, cBuffer+cSize, &dstBuffSize);
-                CHECK (ZBUFF_isError(flushError), "flush error : %s", ZBUFF_getErrorName(flushError));
-                cSize += dstBuffSize;
-        }   }
-
-        /* final frame epilogue */
-        {   size_t remainingToFlush = (size_t)(-1);
-            while (remainingToFlush) {
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                U32 const enoughDstSize = dstBuffSize >= remainingToFlush;
-                remainingToFlush = ZBUFF_compressEnd(zc, cBuffer+cSize, &dstBuffSize);
-                CHECK (ZBUFF_isError(remainingToFlush), "flush error : %s", ZBUFF_getErrorName(remainingToFlush));
-                CHECK (enoughDstSize && remainingToFlush, "ZBUFF_compressEnd() not fully flushed (%u remaining), but enough space available", (U32)remainingToFlush);
-                cSize += dstBuffSize;
-        }   }
-        crcOrig = XXH64_digest(&xxhState);
-
-        /* multi - fragments decompression test */
-        ZBUFF_decompressInitDictionary(zd, dict, dictSize);
-        errorCode = 1;
-        for (totalCSize = 0, totalGenSize = 0 ; errorCode ; ) {
-            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
-            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
-            CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode));
-            totalGenSize += dstBuffSize;
-            totalCSize += readCSrcSize;
-        }
-        CHECK (errorCode != 0, "frame not fully decoded");
-        CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
-        CHECK (totalCSize != cSize, "compressed data should be fully read")
-        { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
-          if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
-          CHECK (crcDest!=crcOrig, "decompressed data corrupted"); }
-
-        /*=====   noisy/erroneous src decompression test   =====*/
-
-        /* add some noise */
-        {   U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2;
-            U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) {
-                size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const noiseSize  = MIN((cSize/3) , randomNoiseSize);
-                size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize);
-                size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize);
-                memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize);
-        }   }
-
-        /* try decompression on noisy data */
-        ZBUFF_decompressInit(zd);
-        totalCSize = 0;
-        totalGenSize = 0;
-        while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) ) {
-            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
-            size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
-            if (ZBUFF_isError(decompressError)) break;   /* error correctly detected */
-            totalGenSize += dstBuffSize;
-            totalCSize += readCSrcSize;
-    }   }
-    DISPLAY("\r%u fuzzer tests completed   \n", testNb);
-
-_cleanup:
-    ZBUFF_freeCCtx(zc);
-    ZBUFF_freeDCtx(zd);
-    free(cNoiseBuffer[0]);
-    free(cNoiseBuffer[1]);
-    free(cNoiseBuffer[2]);
-    free(cNoiseBuffer[3]);
-    free(cNoiseBuffer[4]);
-    free(copyBuffer);
-    free(cBuffer);
-    free(dstBuffer);
-    return result;
-
-_output_error:
-    result = 1;
-    goto _cleanup;
-}
-
-
-/*-*******************************************************
-*  Command line
-*********************************************************/
-static int FUZ_usage(const char* programName)
-{
-    DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [args]\n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "Arguments :\n");
-    DISPLAY( " -i#    : Nb of tests (default:%u) \n", nbTestsDefault);
-    DISPLAY( " -s#    : Select seed (default:prompt user)\n");
-    DISPLAY( " -t#    : Select starting test number (default:0)\n");
-    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
-    DISPLAY( " -v     : verbose\n");
-    DISPLAY( " -p     : pause at the end\n");
-    DISPLAY( " -h     : display help and exit\n");
-    return 0;
-}
-
-
-int main(int argc, const char** argv)
-{
-    U32 seed=0;
-    int seedset=0;
-    int argNb;
-    int nbTests = nbTestsDefault;
-    int testNb = 0;
-    int proba = FUZ_COMPRESSIBILITY_DEFAULT;
-    int result=0;
-    U32 mainPause = 0;
-    const char* programName = argv[0];
-    ZSTD_customMem customMem = { ZBUFF_allocFunction, ZBUFF_freeFunction, NULL };
-    ZSTD_customMem customNULL = { NULL, NULL, NULL };
-
-    /* Check command line */
-    for(argNb=1; argNb<argc; argNb++) {
-        const char* argument = argv[argNb];
-        if(!argument) continue;   /* Protection if argument empty */
-
-        /* Parsing commands. Aggregated commands are allowed */
-        if (argument[0]=='-') {
-            argument++;
-
-            while (*argument!=0) {
-                switch(*argument)
-                {
-                case 'h':
-                    return FUZ_usage(programName);
-                case 'v':
-                    argument++;
-                    g_displayLevel=4;
-                    break;
-                case 'q':
-                    argument++;
-                    g_displayLevel--;
-                    break;
-                case 'p': /* pause at the end */
-                    argument++;
-                    mainPause = 1;
-                    break;
-
-                case 'i':
-                    argument++;
-                    nbTests=0; g_clockTime=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        nbTests *= 10;
-                        nbTests += *argument - '0';
-                        argument++;
-                    }
-                    break;
-
-                case 'T':
-                    argument++;
-                    nbTests=0; g_clockTime=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        g_clockTime *= 10;
-                        g_clockTime += *argument - '0';
-                        argument++;
-                    }
-                    if (*argument=='m') g_clockTime *=60, argument++;
-                    if (*argument=='n') argument++;
-                    g_clockTime *= SEC_TO_MICRO;
-                    break;
-
-                case 's':
-                    argument++;
-                    seed=0;
-                    seedset=1;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        seed *= 10;
-                        seed += *argument - '0';
-                        argument++;
-                    }
-                    break;
-
-                case 't':
-                    argument++;
-                    testNb=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        testNb *= 10;
-                        testNb += *argument - '0';
-                        argument++;
-                    }
-                    break;
-
-                case 'P':   /* compressibility % */
-                    argument++;
-                    proba=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        proba *= 10;
-                        proba += *argument - '0';
-                        argument++;
-                    }
-                    if (proba<0) proba=0;
-                    if (proba>100) proba=100;
-                    break;
-
-                default:
-                    return FUZ_usage(programName);
-                }
-    }   }   }   /* for(argNb=1; argNb<argc; argNb++) */
-
-    /* Get Seed */
-    DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING);
-
-    if (!seedset) {
-        time_t const t = time(NULL);
-        U32 const h = XXH32(&t, sizeof(t), 1);
-        seed = h % 10000;
-    }
-    DISPLAY("Seed = %u\n", seed);
-    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
-
-    if (nbTests<=0) nbTests=1;
-
-    if (testNb==0) {
-        result = basicUnitTests(0, ((double)proba) / 100, customNULL);  /* constant seed for predictability */
-        if (!result) {
-            DISPLAYLEVEL(4, "Unit tests using customMem :\n")
-            result = basicUnitTests(0, ((double)proba) / 100, customMem);  /* use custom memory allocation functions */
-    }   }
-
-    if (!result)
-        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
-
-    if (mainPause) {
-        int unused;
-        DISPLAY("Press Enter \n");
-        unused = getchar();
-        (void)unused;
-    }
-    return result;
-}
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index fa18ea4..bbef903 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,6 +28,7 @@
 #include <assert.h>       /* assert */
 #include "timefn.h"       /* UTIL_time_t, UTIL_getTime */
 #include "mem.h"
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, we still test some deprecated functions */
 #define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
 #include "zstd.h"         /* ZSTD_compressBound */
 #include "zstd_errors.h"  /* ZSTD_error_srcSize_wrong */
@@ -321,7 +322,9 @@
 
     /* Basic compression test using dict */
     DISPLAYLEVEL(3, "test%3i : skipframe + compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    CHECK_Z( ZSTD_initCStream_usingDict(zc, CNBuffer, dictSize, 1 /* cLevel */) );
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_loadDictionary(zc, CNBuffer, dictSize) );
     outBuff.dst = (char*)(compressedBuffer)+cSize;
     assert(compressedBufferSize > cSize);
     outBuff.size = compressedBufferSize - cSize;
@@ -368,7 +371,7 @@
     }
 
     /* Attempt bad compression parameters */
-    DISPLAYLEVEL(3, "test%3i : use bad compression parameters : ", testNb++);
+    DISPLAYLEVEL(3, "test%3i : use bad compression parameters with ZSTD_initCStream_advanced : ", testNb++);
     {   size_t r;
         ZSTD_parameters params = ZSTD_getParams(1, 0, 0);
         params.cParams.minMatch = 2;
@@ -539,7 +542,10 @@
     DISPLAYLEVEL(3, "OK\n");
     /* _srcSize compression test */
     DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    CHECK_Z( ZSTD_initCStream_srcSize(zc, 1, CNBufferSize) );
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize) );
     outBuff.dst = (char*)(compressedBuffer);
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -559,7 +565,10 @@
 
     /* wrong _srcSize compression test */
     DISPLAYLEVEL(3, "test%3i : too large srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1);
-    ZSTD_initCStream_srcSize(zc, 1, CNBufferSize+1);
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize+1) );
     outBuff.dst = (char*)(compressedBuffer);
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -574,7 +583,10 @@
 
     /* wrong _srcSize compression test */
     DISPLAYLEVEL(3, "test%3i : too small srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1);
-    ZSTD_initCStream_srcSize(zc, 1, CNBufferSize-1);
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize-1) );
     outBuff.dst = (char*)(compressedBuffer);
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -587,9 +599,9 @@
     }
 
     DISPLAYLEVEL(3, "test%3i : wrong srcSize !contentSizeFlag : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1);
-    {   ZSTD_parameters params = ZSTD_getParams(1, CNBufferSize, 0);
-        params.fParams.contentSizeFlag = 0;
-        CHECK_Z(ZSTD_initCStream_advanced(zc, NULL, 0, params, CNBufferSize - MIN(CNBufferSize, 200 KB)));
+    {   CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_contentSizeFlag, 0) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize - MIN(CNBufferSize, 200 KB)) );
         outBuff.dst = (char*)compressedBuffer;
         outBuff.size = compressedBufferSize;
         outBuff.pos = 0;
@@ -609,7 +621,9 @@
     /* use 1 */
     {   size_t const inSize = 513;
         DISPLAYLEVEL(5, "use1 ");
-        ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize);   /* needs btopt + search3 to trigger hashLog3 */
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 19) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, inSize) );
         inBuff.src = CNBuffer;
         inBuff.size = inSize;
         inBuff.pos = 0;
@@ -626,7 +640,9 @@
     /* use 2 */
     {   size_t const inSize = 1025;   /* will not continue, because tables auto-adjust and are therefore different size */
         DISPLAYLEVEL(5, "use2 ");
-        ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize);   /* needs btopt + search3 to trigger hashLog3 */
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 19) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, inSize) );
         inBuff.src = CNBuffer;
         inBuff.size = inSize;
         inBuff.pos = 0;
@@ -672,7 +688,7 @@
     cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1);
     CHECK_Z(cSize);
     {   ZSTD_DCtx* dctx = ZSTD_createDCtx();
-        size_t const dctxSize0 = ZSTD_sizeof_DCtx(dctx);        
+        size_t const dctxSize0 = ZSTD_sizeof_DCtx(dctx);
         size_t dctxSize1;
         CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 1));
 
@@ -735,7 +751,7 @@
             CHECK(ZSTD_getErrorCode(r) != ZSTD_error_dstBuffer_wrong, "Must error but got %s", ZSTD_getErrorName(r));
         }
         DISPLAYLEVEL(3, "OK \n");
-        
+
         DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() buffered output : ", testNb++);
         ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
         CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 0));
@@ -1274,7 +1290,7 @@
     if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error;
     DISPLAYLEVEL(3, "OK \n");
 
-    DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly : ", testNb++);
+    DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly with ZSTD_initCStream_advanced : ", testNb++);
     {   ZSTD_parameters params = ZSTD_getParams(5, 0, 0);
         params.fParams.contentSizeFlag = 1;
         CHECK_Z( ZSTD_initCStream_advanced(zc, NULL, 0, params, 0) );
@@ -1290,7 +1306,8 @@
     cSize = outBuff.pos;
     if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error;
 
-    ZSTD_resetCStream(zc, 0); /* resetCStream should treat 0 as unknown */
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, ZSTD_CONTENTSIZE_UNKNOWN) );
     outBuff.dst = compressedBuffer;
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -1434,7 +1451,8 @@
         CHECK_Z(ZSTD_initCStream_srcSize(zc, 11, ZSTD_CONTENTSIZE_UNKNOWN));
         CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_compressionLevel, &level));
         CHECK(level != 11, "Compression level does not match");
-        ZSTD_resetCStream(zc, ZSTD_CONTENTSIZE_UNKNOWN);
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, ZSTD_CONTENTSIZE_UNKNOWN) );
         CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_compressionLevel, &level));
         CHECK(level != 11, "Compression level does not match");
     }
@@ -1444,7 +1462,8 @@
     {   ZSTD_parameters const params = ZSTD_getParams(9, 0, 0);
         CHECK_Z(ZSTD_initCStream_advanced(zc, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN));
         CHECK(badParameters(zc, params), "Compression parameters do not match");
-        ZSTD_resetCStream(zc, ZSTD_CONTENTSIZE_UNKNOWN);
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, ZSTD_CONTENTSIZE_UNKNOWN) );
         CHECK(badParameters(zc, params), "Compression parameters do not match");
     }
     DISPLAYLEVEL(3, "OK \n");
@@ -1836,8 +1855,9 @@
             && oldTestLog /* at least one test happened */ && resetAllowed) {
             maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2);
             maxTestSize = MIN(maxTestSize, srcBufferSize-16);
-            {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize;
-                CHECK_Z( ZSTD_resetCStream(zc, pledgedSrcSize) );
+            {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; 
+                CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+                CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) );
             }
         } else {
             U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
@@ -1855,11 +1875,13 @@
                 dict = srcBuffer + dictStart;
             }
             {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
-                ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize);
-                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1;
-                CHECK_Z ( ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) );
+                CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, cLevel) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, FUZ_rand(&lseed) & 1) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_contentSizeFlag, FUZ_rand(&lseed) & 1) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_dictIDFlag, FUZ_rand(&lseed) & 1) );
+                CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) );
+                CHECK_Z( ZSTD_CCtx_loadDictionary(zc, dict, dictSize) );
         }   }
 
         /* multi-segments compression test */
@@ -2215,6 +2237,7 @@
                 }
 
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
+                if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) );
 
                 /* Apply parameters */
                 if (opaqueAPI) {
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index d74c41b..6fd5ac3 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -6,7 +6,7 @@
 
 
 # Paths to static and dynamic zlib and zstd libraries
-# Use "make ZLIB_PATH=path/to/zlib ZLIB_LIBRARY=path/to/libz.a" to select a path to library
+# Use "make ZLIB_PATH=path/to/zlib ZLIB_LIBRARY=path/to/libz.so" to select a path to library
 ZLIB_LIBRARY ?= -lz
 ZLIB_PATH ?= .
 
@@ -18,7 +18,8 @@
 PROGRAMS_PATH = ../programs
 TEST_FILE = ../doc/zstd_compression_format.md
 
-VPATH = $(PROGRAMS_PATH)
+vpath %.c $(PROGRAMS_PATH) $(EXAMPLE_PATH) $(ZLIBWRAPPER_PATH)
+
 
 CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH)       \
             -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH)
@@ -28,7 +29,9 @@
             -Wdeclaration-after-statement -Wstrict-prototypes -Wundef     \
             -Wstrict-aliasing=1
 CFLAGS   ?= -O3
-CFLAGS   += $(STDFLAGS) $(DEBUGFLAGS) $(MOREFLAGS)
+CFLAGS   += $(STDFLAGS) $(DEBUGFLAGS)
+CPPFLAGS += $(MOREFLAGS)
+LDLIBS   += $(ZLIB_LIBRARY)
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
@@ -61,7 +64,7 @@
 	./minigzip_zstd -d example$(EXT).gz
 	@echo ---- minigzip end ----
 	./zwrapbench -qi1b3B1K $(TEST_FILE)
-	./zwrapbench -rqi1b1e5 ../lib ../programs ../tests
+	./zwrapbench -rqi1b1e3 ../lib
 
 #valgrindTest: ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.so
 valgrindTest: VALGRIND = LD_LIBRARY_PATH=$(ZSTDLIBDIR) valgrind --track-origins=yes --leak-check=full --error-exitcode=1
@@ -79,35 +82,32 @@
 #.c.o:
 #	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
 
-minigzip: $(EXAMPLE_PATH)/minigzip.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZSTDLIBRARY) $(ZLIB_LIBRARY) -o $@
+minigzip: minigzip.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
 
-minigzip_zstd: $(EXAMPLE_PATH)/minigzip.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZSTDLIBRARY) $(ZLIB_LIBRARY) -o $@
+minigzip_zstd: minigzip.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
+	$(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION)
 
-example: $(EXAMPLE_PATH)/example.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
+example: example.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
 
-example_zstd: $(EXAMPLE_PATH)/example.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
+example_zstd: example.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY)
+	$(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION)
 
-fitblk: $(EXAMPLE_PATH)/fitblk.o zstd_zlibwrapper.o $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
+fitblk: fitblk.o zstd_zlibwrapper.o $(ZSTDLIBRARY)
 
-fitblk_zstd: $(EXAMPLE_PATH)/fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
+fitblk_zstd: fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY)
+	$(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION)
 
-zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o zstd_zlibwrapper.o util.o timefn.o datagen.o $(ZSTDLIBRARY)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
+zwrapbench: zwrapbench.o zstd_zlibwrapper.o util.o timefn.o datagen.o $(ZSTDLIBRARY)
 
 
-zstd_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h
+zstd_zlibwrapper.o: zstd_zlibwrapper.h
 
 zstdTurnedOn_zlibwrapper.o: CPPFLAGS += -DZWRAP_USE_ZSTD=1
-zstdTurnedOn_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h
-	$(CC) $(CPPFLAGS) $(CFLAGS) $< -c -o $@
+zstdTurnedOn_zlibwrapper.o: zstd_zlibwrapper.c zstd_zlibwrapper.h
+	$(COMPILE.c) $< $(OUTPUT_OPTION)
 
-$(ZSTDLIBDIR)/libzstd.a:
+
+$(ZSTDLIBRARY):
 	$(MAKE) -C $(ZSTDLIBDIR) libzstd.a
 
 $(ZSTDLIBDIR)/libzstd.so:
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index f30cad4..5993e51 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -264,14 +264,29 @@
                     ZSTD_outBuffer outBuffer;
                     ZSTD_CStream* zbc = ZSTD_createCStream();
                     size_t rSize;
+                    ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
+
+                    if (!cctxParams) EXM_THROW(1, "ZSTD_createCCtxParams() allocation failure");
                     if (zbc == NULL) EXM_THROW(1, "ZSTD_createCStream() allocation failure");
-                    rSize = ZSTD_initCStream_advanced(zbc, dictBuffer, dictBufferSize, zparams, avgSize);
-                    if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_initCStream_advanced() failed : %s", ZSTD_getErrorName(rSize));
+
+                    {   int initErr = 0;
+                        initErr |= ZSTD_isError(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only));
+                        initErr |= ZSTD_isError(ZSTD_CCtxParams_init_advanced(cctxParams, zparams));
+                        initErr |= ZSTD_isError(ZSTD_CCtx_setParametersUsingCCtxParams(zbc, cctxParams));
+                        initErr |= ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(zbc, avgSize));
+                        initErr |= ZSTD_isError(ZSTD_CCtx_loadDictionary(zbc, dictBuffer, dictBufferSize));
+
+                        ZSTD_freeCCtxParams(cctxParams);
+                        if (initErr) EXM_THROW(1, "CCtx init failed!");
+                    }
+
                     do {
                         U32 blockNb;
                         for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                            rSize = ZSTD_resetCStream(zbc, blockTable[blockNb].srcSize);
-                            if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_resetCStream() failed : %s", ZSTD_getErrorName(rSize));
+                            rSize = ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only);
+                            if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_CCtx_reset() failed : %s", ZSTD_getErrorName(rSize));
+                            rSize = ZSTD_CCtx_setPledgedSrcSize(zbc, blockTable[blockNb].srcSize);
+                            if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_CCtx_setPledgedSrcSize() failed : %s", ZSTD_getErrorName(rSize));
                             inBuffer.src = blockTable[blockNb].srcPtr;
                             inBuffer.size = blockTable[blockNb].srcSize;
                             inBuffer.pos = 0;
@@ -418,8 +433,8 @@
                     do {
                         U32 blockNb;
                         for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                            rSize = ZSTD_resetDStream(zbd);
-                            if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_resetDStream() failed : %s", ZSTD_getErrorName(rSize));
+                            rSize = ZSTD_DCtx_reset(zbd, ZSTD_reset_session_only);
+                            if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_DCtx_reset() failed : %s", ZSTD_getErrorName(rSize));
                             inBuffer.src = blockTable[blockNb].cPtr;
                             inBuffer.size = blockTable[blockNb].cSize;
                             inBuffer.pos = 0;
diff --git a/zlibWrapper/gzcompatibility.h b/zlibWrapper/gzcompatibility.h
index 394648a..c1aa2b8 100644
--- a/zlibWrapper/gzcompatibility.h
+++ b/zlibWrapper/gzcompatibility.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/zlibWrapper/zstd_zlibwrapper.c b/zlibWrapper/zstd_zlibwrapper.c
index 0ae5012..ceb2393 100644
--- a/zlibWrapper/zstd_zlibwrapper.c
+++ b/zlibWrapper/zstd_zlibwrapper.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -205,12 +205,21 @@
     if (zwc == NULL || zwc->zbc == NULL) return Z_STREAM_ERROR;
 
     if (!pledgedSrcSize) pledgedSrcSize = zwc->pledgedSrcSize;
-    {   ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize);
-        size_t initErr;
+    {   unsigned initErr = 0;
+        ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize);
+        ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
+        if (!cctxParams) return Z_STREAM_ERROR;
         LOG_WRAPPERC("pledgedSrcSize=%d windowLog=%d chainLog=%d hashLog=%d searchLog=%d minMatch=%d strategy=%d\n",
                     (int)pledgedSrcSize, params.cParams.windowLog, params.cParams.chainLog, params.cParams.hashLog, params.cParams.searchLog, params.cParams.minMatch, params.cParams.strategy);
-        initErr = ZSTD_initCStream_advanced(zwc->zbc, dict, dictSize, params, pledgedSrcSize);
-        if (ZSTD_isError(initErr)) return Z_STREAM_ERROR;
+
+        initErr |= ZSTD_isError(ZSTD_CCtx_reset(zwc->zbc, ZSTD_reset_session_only));
+        initErr |= ZSTD_isError(ZSTD_CCtxParams_init_advanced(cctxParams, params));
+        initErr |= ZSTD_isError(ZSTD_CCtx_setParametersUsingCCtxParams(zwc->zbc, cctxParams));
+        initErr |= ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(zwc->zbc, pledgedSrcSize));
+        initErr |= ZSTD_isError(ZSTD_CCtx_loadDictionary(zwc->zbc, dict, dictSize));
+
+        ZSTD_freeCCtxParams(cctxParams);
+        if (initErr) return Z_STREAM_ERROR;
     }
 
     return Z_OK;
@@ -372,9 +381,15 @@
     } else {
         if (zwc->totalInBytes == 0) {
             if (zwc->comprState == ZWRAP_useReset) {
-                size_t const resetErr = ZSTD_resetCStream(zwc->zbc, (flush == Z_FINISH) ? strm->avail_in : zwc->pledgedSrcSize);
+                size_t resetErr = ZSTD_CCtx_reset(zwc->zbc, ZSTD_reset_session_only);
                 if (ZSTD_isError(resetErr)) {
-                    LOG_WRAPPERC("ERROR: ZSTD_resetCStream errorCode=%s\n",
+                    LOG_WRAPPERC("ERROR: ZSTD_CCtx_reset errorCode=%s\n",
+                                ZSTD_getErrorName(resetErr));
+                    return ZWRAPC_finishWithError(zwc, strm, 0);
+                }
+                resetErr = ZSTD_CCtx_setPledgedSrcSize(zwc->zbc, (flush == Z_FINISH) ? strm->avail_in : zwc->pledgedSrcSize);
+                if (ZSTD_isError(resetErr)) {
+                    LOG_WRAPPERC("ERROR: ZSTD_CCtx_setPledgedSrcSize errorCode=%s\n",
                                 ZSTD_getErrorName(resetErr));
                     return ZWRAPC_finishWithError(zwc, strm, 0);
                 }
@@ -829,7 +844,7 @@
                     goto error;
                 }
             } else {
-                size_t const resetErr = ZSTD_resetDStream(zwd->zbd);
+                size_t const resetErr = ZSTD_DCtx_reset(zwd->zbd, ZSTD_reset_session_only);
                 if (ZSTD_isError(resetErr)) goto error;
             }
         } else {
@@ -849,7 +864,7 @@
                     goto error;
                 }
             } else {
-                size_t const resetErr = ZSTD_resetDStream(zwd->zbd);
+                size_t const resetErr = ZSTD_DCtx_reset(zwd->zbd, ZSTD_reset_session_only);
                 if (ZSTD_isError(resetErr)) goto error;
             }
 
diff --git a/zlibWrapper/zstd_zlibwrapper.h b/zlibWrapper/zstd_zlibwrapper.h
index e791043..042ab9f 100644
--- a/zlibWrapper/zstd_zlibwrapper.h
+++ b/zlibWrapper/zstd_zlibwrapper.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the