Merge android-4.9 into android-msm-bluecross-4.9-lts Merge android-4.9 common kernel into B1/C1 kernel LTS staging branch. Since android-msm-bluecross-4.9-lts is currently merged to LTS 4.9.150, I deliberately chose to merge only up to: commit ca975794ea76 ("ANDROID: cuttlefish: enable CONFIG_NET_CLS_BPF=y") which is the last commit on android-4.9 before LTS 4.9.151 was merged in. drivers/android/binder.c reviewed by tkjos@ drivers/block/zram/ resolved and reviewed by minchan@ drivers/cpufreq/cpufreq_times.c reviewed by cfries@ drivers/staging/android/ion/ reviewed by pgynther@ fs/crypto/ resolved and reviewed by ebiggers@ and jaegeuk@ fs/f2fs/ resolved and reviewed by jaegeuk@ fs/squashfs/ resolved with 'git checkout --theirs' (take android-4.9 as-is) kernel/sched/ resolved and reviewed by tkjos@ Bug: 115649324 Test: Manual testing Change-Id: Ib374f720a7ab4cf4146177584e486124eff75de3 Signed-off-by: Petri Gynther <pgynther@google.com>

commit: 82fb8eb1d4f8b41cc76b59fc567f77b6e9c4ffd8 [log] [tgz]
author: Petri Gynther <pgynther@google.com> Thu Jan 24 16:29:12 2019 -0800
committer: Petri Gynther <pgynther@google.com> Thu Jan 24 23:35:45 2019 -0800
tree: a30c69324a01e9cf76542e2122235779c2a2750b
parent: ab281c5bf4753b1048d8a059342f89a0d24e9169 [diff]
parent: ca975794ea765c52ccf59f30197d9677ec036418 [diff]
diff --git a/Documentation/ABI/testing/procfs-concurrent_time b/Documentation/ABI/testing/procfs-concurrent_time
new file mode 100644
index 0000000..55b4142
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-concurrent_time

@@ -0,0 +1,16 @@
+What:		/proc/uid_concurrent_active_time
+Date:		December 2018
+Contact:	Connor O'Brien <connoro@google.com>
+Description:
+	The /proc/uid_concurrent_active_time file displays aggregated cputime
+	numbers for each uid, broken down by the total number of cores that were
+	active while the uid's task was running.
+
+What:		/proc/uid_concurrent_policy_time
+Date:		December 2018
+Contact:	Connor O'Brien <connoro@google.com>
+Description:
+	The /proc/uid_concurrent_policy_time file displays aggregated cputime
+	numbers for each uid, broken down based on the cpufreq policy
+	of the core used by the uid's task and the number of cores associated
+	with that policy that were active while the uid's task was running.

diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index 692171f..ae9a8f7 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt

@@ -76,6 +76,20 @@
     thread because it benefits CFQ to have writes submitted using the
     same context.
 
+sector_size:<bytes>
+    Use <bytes> as the encryption unit instead of 512 bytes sectors.
+    This option can be in range 512 - 4096 bytes and must be power of two.
+    Virtual device will announce this size as a minimal IO and logical sector.
+
+iv_large_sectors
+   IV generators will use sector number counted in <sector_size> units
+   instead of default 512 bytes sectors.
+
+   For example, if <sector_size> is 4096 bytes, plain64 IV for the second
+   sector will be 8 (without flag) and 1 if iv_large_sectors is present.
+   The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
+   if this flag is specified.
+
 Example scripts
 ===============
 LUKS (Linux Unified Key Setup) is now the preferred way to set up disk

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 86addee..cc7568a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -3988,6 +3988,14 @@
 			last alloc / free. For more information see
 			Documentation/vm/slub.txt.
 
+	slub_memcg_sysfs=       [MM, SLUB]
+			Determines whether to enable sysfs directories for
+			memory cgroup sub-caches. 1 to enable, 0 to disable.
+			The default is determined by CONFIG_SLUB_MEMCG_SYSFS_ON.
+			Enabling this can lead to a very high number of debug
+			directories and files being created under
+			/sys/kernel/slub.
+
 	slub_max_order= [MM, SLUB]
 			Determines the maximum allowed order for slabs.
 			A high setting may cause OOMs due to memory

diff --git a/Makefile b/Makefile
index b34e371..fc02246 100644
--- a/Makefile
+++ b/Makefile

@@ -520,6 +520,9 @@
 ifneq ($(CROSS_COMPILE),)
 CLANG_TRIPLE	?= $(CROSS_COMPILE)
 CLANG_TARGET	:= --target=$(notdir $(CLANG_TRIPLE:%-=%))
+ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_TARGET)), y)
+$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?")
+endif
 GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD)))
 CLANG_PREFIX	:= --prefix=$(GCC_TOOLCHAIN_DIR)
 GCC_TOOLCHAIN	:= $(realpath $(GCC_TOOLCHAIN_DIR)/..)

diff --git a/arch/Kconfig b/arch/Kconfig
index 364d5da..eab393f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig

@@ -530,6 +530,7 @@
 	bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)"
 	depends on ARCH_SUPPORTS_LTO_CLANG
 	depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT
+	depends on !KASAN
 	select LTO
 	select THIN_ARCHIVES
 	select LD_DEAD_CODE_DATA_ELIMINATION

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 72aa5ec..27e2309 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig

@@ -62,34 +62,25 @@
 	  using optimized ARM assembler and NEON, when available.
 
 config CRYPTO_AES_ARM
-	tristate "AES cipher algorithms (ARM-asm)"
-	depends on ARM
+	tristate "Scalar AES cipher for ARM"
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES
 	help
 	  Use optimized AES assembler routines for ARM platforms.
 
-	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
-	  algorithm.
+	  On ARM processors without the Crypto Extensions, this is the
+	  fastest AES implementation for single blocks.  For multiple
+	  blocks, the NEON bit-sliced implementation is usually faster.
 
-	  Rijndael appears to be consistently a very good performer in
-	  both hardware and software across a wide range of computing
-	  environments regardless of its use in feedback or non-feedback
-	  modes. Its key setup time is excellent, and its key agility is
-	  good. Rijndael's very low memory requirements make it very well
-	  suited for restricted-space environments, in which it also
-	  demonstrates excellent performance. Rijndael's operations are
-	  among the easiest to defend against power and timing attacks.
-
-	  The AES specifies three key sizes: 128, 192 and 256 bits
-
-	  See <http://csrc.nist.gov/encryption/aes/> for more information.
+	  This implementation may be vulnerable to cache timing attacks,
+	  since it uses lookup tables.  However, as countermeasures it
+	  disables IRQs and preloads the tables; it is hoped this makes
+	  such attacks very difficult.
 
 config CRYPTO_AES_ARM_BS
 	tristate "Bit sliced AES using NEON instructions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
-	select CRYPTO_AES_ARM
 	select CRYPTO_ABLK_HELPER
 	help
 	  Use a faster and more secure NEON based implementation of AES in CBC,
@@ -120,11 +111,15 @@
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
 	  that is part of the ARMv8 Crypto Extensions
 
-config CRYPTO_SPECK_NEON
-	tristate "NEON accelerated Speck cipher algorithms"
+config CRYPTO_CHACHA20_NEON
+	tristate "NEON accelerated ChaCha stream cipher algorithms"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_GF128MUL
-	select CRYPTO_SPECK
+	select CRYPTO_CHACHA20
+
+config CRYPTO_NHPOLY1305_NEON
+	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_NHPOLY1305
 
 endif

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 1d0448a..ae2480b 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile

@@ -8,7 +8,8 @@
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -25,8 +26,8 @@
 endif
 endif
 
-aes-arm-y	:= aes-armv4.o aes_glue.o
-aes-arm-bs-y	:= aesbs-core.o aesbs-glue.o
+aes-arm-y	:= aes-cipher-core.o aes-cipher-glue.o
+aes-arm-bs-y	:= aes-armv4.o aesbs-core.o aesbs-glue.o
 sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
 sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
 sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
@@ -37,7 +38,8 @@
 sha2-arm-ce-y	:= sha2-ce-core.o sha2-ce-glue.o
 aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)

diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
new file mode 100644
index 0000000..f2d67c0
--- /dev/null
+++ b/arch/arm/crypto/aes-cipher-core.S

@@ -0,0 +1,264 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+	.text
+	.align		5
+
+	rk		.req	r0
+	rounds		.req	r1
+	in		.req	r2
+	out		.req	r3
+	ttab		.req	ip
+
+	t0		.req	lr
+	t1		.req	r2
+	t2		.req	r3
+
+	.macro		__select, out, in, idx
+	.if		__LINUX_ARM_ARCH__ < 7
+	and		\out, \in, #0xff << (8 * \idx)
+	.else
+	ubfx		\out, \in, #(8 * \idx), #8
+	.endif
+	.endm
+
+	.macro		__load, out, in, idx, sz, op
+	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
+	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
+	.else
+	ldr\op		\out, [ttab, \in, lsl #\sz]
+	.endif
+	.endm
+
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
+	__select	\out0, \in0, 0
+	__select	t0, \in1, 1
+	__load		\out0, \out0, 0, \sz, \op
+	__load		t0, t0, 1, \sz, \op
+
+	.if		\enc
+	__select	\out1, \in1, 0
+	__select	t1, \in2, 1
+	.else
+	__select	\out1, \in3, 0
+	__select	t1, \in0, 1
+	.endif
+	__load		\out1, \out1, 0, \sz, \op
+	__select	t2, \in2, 2
+	__load		t1, t1, 1, \sz, \op
+	__load		t2, t2, 2, \sz, \op
+
+	eor		\out0, \out0, t0, ror #24
+
+	__select	t0, \in3, 3
+	.if		\enc
+	__select	\t3, \in3, 2
+	__select	\t4, \in0, 3
+	.else
+	__select	\t3, \in1, 2
+	__select	\t4, \in2, 3
+	.endif
+	__load		\t3, \t3, 2, \sz, \op
+	__load		t0, t0, 3, \sz, \op
+	__load		\t4, \t4, 3, \sz, \op
+
+	.ifnb		\oldcpsr
+	/*
+	 * This is the final round and we're done with all data-dependent table
+	 * lookups, so we can safely re-enable interrupts.
+	 */
+	restore_irqs	\oldcpsr
+	.endif
+
+	eor		\out1, \out1, t1, ror #24
+	eor		\out0, \out0, t2, ror #16
+	ldm		rk!, {t1, t2}
+	eor		\out1, \out1, \t3, ror #16
+	eor		\out0, \out0, t0, ror #8
+	eor		\out1, \out1, \t4, ror #8
+	eor		\out0, \out0, t1
+	eor		\out1, \out1, t2
+	.endm
+
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
+	.endm
+
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
+	.endm
+
+	.macro		__rev, out, in
+	.if		__LINUX_ARM_ARCH__ < 6
+	lsl		t0, \in, #24
+	and		t1, \in, #0xff00
+	and		t2, \in, #0xff0000
+	orr		\out, t0, \in, lsr #24
+	orr		\out, \out, t1, lsl #8
+	orr		\out, \out, t2, lsr #8
+	.else
+	rev		\out, \in
+	.endif
+	.endm
+
+	.macro		__adrl, out, sym, c
+	.if		__LINUX_ARM_ARCH__ < 7
+	ldr\c		\out, =\sym
+	.else
+	movw\c		\out, #:lower16:\sym
+	movt\c		\out, #:upper16:\sym
+	.endif
+	.endm
+
+	.macro		do_crypt, round, ttab, ltab, bsz
+	push		{r3-r11, lr}
+
+	// Load keys first, to reduce latency in case they're not cached yet.
+	ldm		rk!, {r8-r11}
+
+	ldr		r4, [in]
+	ldr		r5, [in, #4]
+	ldr		r6, [in, #8]
+	ldr		r7, [in, #12]
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	__rev		r4, r4
+	__rev		r5, r5
+	__rev		r6, r6
+	__rev		r7, r7
+#endif
+
+	eor		r4, r4, r8
+	eor		r5, r5, r9
+	eor		r6, r6, r10
+	eor		r7, r7, r11
+
+	__adrl		ttab, \ttab
+	/*
+	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
+	 * intended to make cache-timing attacks more difficult.  They may not
+	 * be fully prevented, however; see the paper
+	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+	 * ("Cache-timing attacks on AES") for a discussion of the many
+	 * difficulties involved in writing truly constant-time AES software.
+	 */
+	 save_and_disable_irqs	t0
+	.set		i, 0
+	.rept		1024 / 128
+	ldr		r8, [ttab, #i + 0]
+	ldr		r9, [ttab, #i + 32]
+	ldr		r10, [ttab, #i + 64]
+	ldr		r11, [ttab, #i + 96]
+	.set		i, i + 128
+	.endr
+	push		{t0}		// oldcpsr
+
+	tst		rounds, #2
+	bne		1f
+
+0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
+	\round		r4, r5, r6, r7, r8, r9, r10, r11
+
+1:	subs		rounds, rounds, #4
+	\round		r8, r9, r10, r11, r4, r5, r6, r7
+	bls		2f
+	\round		r4, r5, r6, r7, r8, r9, r10, r11
+	b		0b
+
+2:	.ifb		\ltab
+	add		ttab, ttab, #1
+	.else
+	__adrl		ttab, \ltab
+	// Prefetch inverse S-box for final round; see explanation above
+	.set		i, 0
+	.rept		256 / 64
+	ldr		t0, [ttab, #i + 0]
+	ldr		t1, [ttab, #i + 32]
+	.set		i, i + 64
+	.endr
+	.endif
+
+	pop		{rounds}	// oldcpsr
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	__rev		r4, r4
+	__rev		r5, r5
+	__rev		r6, r6
+	__rev		r7, r7
+#endif
+
+	ldr		out, [sp]
+
+	str		r4, [out]
+	str		r5, [out, #4]
+	str		r6, [out, #8]
+	str		r7, [out, #12]
+
+	pop		{r3-r11, pc}
+
+	.align		3
+	.ltorg
+	.endm
+
+ENTRY(__aes_arm_encrypt)
+	do_crypt	fround, crypto_ft_tab,, 2
+ENDPROC(__aes_arm_encrypt)
+
+	.align		5
+ENTRY(__aes_arm_decrypt)
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
+ENDPROC(__aes_arm_decrypt)
+
+	.section	".rodata", "a"
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox

diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
new file mode 100644
index 0000000..c222f6e
--- /dev/null
+++ b/arch/arm/crypto/aes-cipher-glue.c

@@ -0,0 +1,74 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL(__aes_arm_encrypt);
+
+asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL(__aes_arm_decrypt);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	int rounds = 6 + ctx->key_length / 4;
+
+	__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	int rounds = 6 + ctx->key_length / 4;
+
+	__aes_arm_decrypt(ctx->key_dec, rounds, in, out);
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name			= "aes",
+	.cra_driver_name		= "aes-arm",
+	.cra_priority			= 200,
+	.cra_flags			= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize			= AES_BLOCK_SIZE,
+	.cra_ctxsize			= sizeof(struct crypto_aes_ctx),
+	.cra_module			= THIS_MODULE,
+
+	.cra_cipher.cia_min_keysize	= AES_MIN_KEY_SIZE,
+	.cra_cipher.cia_max_keysize	= AES_MAX_KEY_SIZE,
+	.cra_cipher.cia_setkey		= crypto_aes_set_key,
+	.cra_cipher.cia_encrypt		= aes_encrypt,
+	.cra_cipher.cia_decrypt		= aes_decrypt,
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	.cra_alignmask			= 3,
+#endif
+};
+
+static int __init aes_init(void)
+{
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Scalar AES cipher for ARM");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");

diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
deleted file mode 100644
index 0409b8f..0000000
--- a/arch/arm/crypto/aes_glue.c
+++ /dev/null

@@ -1,98 +0,0 @@
-/*
- * Glue Code for the asm optimized version of the AES Cipher Algorithm
- */
-
-#include <linux/module.h>
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-#include "aes_glue.h"
-
-EXPORT_SYMBOL(AES_encrypt);
-EXPORT_SYMBOL(AES_decrypt);
-EXPORT_SYMBOL(private_AES_set_encrypt_key);
-EXPORT_SYMBOL(private_AES_set_decrypt_key);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-	AES_encrypt(src, dst, &ctx->enc_key);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-	AES_decrypt(src, dst, &ctx->dec_key);
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		unsigned int key_len)
-{
-	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-
-	switch (key_len) {
-	case AES_KEYSIZE_128:
-		key_len = 128;
-		break;
-	case AES_KEYSIZE_192:
-		key_len = 192;
-		break;
-	case AES_KEYSIZE_256:
-		key_len = 256;
-		break;
-	default:
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	/* private_AES_set_decrypt_key expects an encryption key as input */
-	ctx->dec_key = ctx->enc_key;
-	if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static struct crypto_alg aes_alg = {
-	.cra_name		= "aes",
-	.cra_driver_name	= "aes-asm",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct AES_CTX),
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
-		}
-	}
-};
-
-static int __init aes_init(void)
-{
-	return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-	crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("aes");
-MODULE_ALIAS_CRYPTO("aes-asm");
-MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");

diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S
new file mode 100644
index 0000000..eb22926
--- /dev/null
+++ b/arch/arm/crypto/chacha-neon-core.S

@@ -0,0 +1,560 @@
+/*
+ * ChaCha/XChaCha NEON helper functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+ /*
+  * NEON doesn't have a rotate instruction.  The alternatives are, more or less:
+  *
+  * (a)  vshl.u32 + vsri.u32		(needs temporary register)
+  * (b)  vshl.u32 + vshr.u32 + vorr	(needs temporary register)
+  * (c)  vrev32.16			(16-bit rotations only)
+  * (d)  vtbl.8 + vtbl.8		(multiple of 8 bits rotations only,
+  *					 needs index vector)
+  *
+  * ChaCha has 16, 12, 8, and 7-bit rotations.  For the 12 and 7-bit rotations,
+  * the only choices are (a) and (b).  We use (a) since it takes two-thirds the
+  * cycles of (b) on both Cortex-A7 and Cortex-A53.
+  *
+  * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
+  * and doesn't need a temporary register.
+  *
+  * For the 8-bit rotation, we use vtbl.8 + vtbl.8.  On Cortex-A7, this sequence
+  * is twice as fast as (a), even when doing (a) on multiple registers
+  * simultaneously to eliminate the stall between vshl and vsri.  Also, it
+  * parallelizes better when temporary registers are scarce.
+  *
+  * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
+  * (a), so the need to load the rotation table actually makes the vtbl method
+  * slightly slower overall on that CPU (~1.3% slower ChaCha20).  Still, it
+  * seems to be a good compromise to get a more significant speed boost on some
+  * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
+  */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+	.align		5
+
+/*
+ * chacha_permute - permute one block
+ *
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers q0-q3.  It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ *
+ * The round count is given in r3.
+ *
+ * Clobbers: r3, ip, q4-q5
+ */
+chacha_permute:
+
+	adr		ip, .Lrol8_table
+	vld1.8		{d10}, [ip, :64]
+
+.Ldoubleround:
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vrev32.16	q3, q3
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #12
+	vsri.u32	q1, q4, #20
+
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vtbl.8		d6, {d6}, d10
+	vtbl.8		d7, {d7}, d10
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #7
+	vsri.u32	q1, q4, #25
+
+	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+	vext.8		q1, q1, q1, #4
+	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+	vext.8		q2, q2, q2, #8
+	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+	vext.8		q3, q3, q3, #12
+
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vrev32.16	q3, q3
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #12
+	vsri.u32	q1, q4, #20
+
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vtbl.8		d6, {d6}, d10
+	vtbl.8		d7, {d7}, d10
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #7
+	vsri.u32	q1, q4, #25
+
+	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+	vext.8		q1, q1, q1, #12
+	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+	vext.8		q2, q2, q2, #8
+	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+	vext.8		q3, q3, q3, #4
+
+	subs		r3, r3, #2
+	bne		.Ldoubleround
+
+	bx		lr
+ENDPROC(chacha_permute)
+
+ENTRY(chacha_block_xor_neon)
+	// r0: Input state matrix, s
+	// r1: 1 data block output, o
+	// r2: 1 data block input, i
+	// r3: nrounds
+	push		{lr}
+
+	// x0..3 = s0..3
+	add		ip, r0, #0x20
+	vld1.32		{q0-q1}, [r0]
+	vld1.32		{q2-q3}, [ip]
+
+	vmov		q8, q0
+	vmov		q9, q1
+	vmov		q10, q2
+	vmov		q11, q3
+
+	bl		chacha_permute
+
+	add		ip, r2, #0x20
+	vld1.8		{q4-q5}, [r2]
+	vld1.8		{q6-q7}, [ip]
+
+	// o0 = i0 ^ (x0 + s0)
+	vadd.i32	q0, q0, q8
+	veor		q0, q0, q4
+
+	// o1 = i1 ^ (x1 + s1)
+	vadd.i32	q1, q1, q9
+	veor		q1, q1, q5
+
+	// o2 = i2 ^ (x2 + s2)
+	vadd.i32	q2, q2, q10
+	veor		q2, q2, q6
+
+	// o3 = i3 ^ (x3 + s3)
+	vadd.i32	q3, q3, q11
+	veor		q3, q3, q7
+
+	add		ip, r1, #0x20
+	vst1.8		{q0-q1}, [r1]
+	vst1.8		{q2-q3}, [ip]
+
+	pop		{pc}
+ENDPROC(chacha_block_xor_neon)
+
+ENTRY(hchacha_block_neon)
+	// r0: Input state matrix, s
+	// r1: output (8 32-bit words)
+	// r2: nrounds
+	push		{lr}
+
+	vld1.32		{q0-q1}, [r0]!
+	vld1.32		{q2-q3}, [r0]
+
+	mov		r3, r2
+	bl		chacha_permute
+
+	vst1.32		{q0}, [r1]!
+	vst1.32		{q3}, [r1]
+
+	pop		{pc}
+ENDPROC(hchacha_block_neon)
+
+	.align		4
+.Lctrinc:	.word	0, 1, 2, 3
+.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
+
+	.align		5
+ENTRY(chacha_4block_xor_neon)
+	push		{r4-r5}
+	mov		r4, sp			// preserve the stack pointer
+	sub		ip, sp, #0x20		// allocate a 32 byte buffer
+	bic		ip, ip, #0x1f		// aligned to 32 bytes
+	mov		sp, ip
+
+	// r0: Input state matrix, s
+	// r1: 4 data blocks output, o
+	// r2: 4 data blocks input, i
+	// r3: nrounds
+
+	//
+	// This function encrypts four consecutive ChaCha blocks by loading
+	// the state matrix in NEON registers four times. The algorithm performs
+	// each operation on the corresponding word of each state matrix, hence
+	// requires no word shuffling. The words are re-interleaved before the
+	// final addition of the original state and the XORing step.
+	//
+
+	// x0..15[0-3] = s0..15[0-3]
+	add		ip, r0, #0x20
+	vld1.32		{q0-q1}, [r0]
+	vld1.32		{q2-q3}, [ip]
+
+	adr		r5, .Lctrinc
+	vdup.32		q15, d7[1]
+	vdup.32		q14, d7[0]
+	vld1.32		{q4}, [r5, :128]
+	vdup.32		q13, d6[1]
+	vdup.32		q12, d6[0]
+	vdup.32		q11, d5[1]
+	vdup.32		q10, d5[0]
+	vadd.u32	q12, q12, q4		// x12 += counter values 0-3
+	vdup.32		q9, d4[1]
+	vdup.32		q8, d4[0]
+	vdup.32		q7, d3[1]
+	vdup.32		q6, d3[0]
+	vdup.32		q5, d2[1]
+	vdup.32		q4, d2[0]
+	vdup.32		q3, d1[1]
+	vdup.32		q2, d1[0]
+	vdup.32		q1, d0[1]
+	vdup.32		q0, d0[0]
+
+	adr		ip, .Lrol8_table
+	b		1f
+
+.Ldoubleround4:
+	vld1.32		{q8-q9}, [sp, :256]
+1:
+	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+	vadd.i32	q0, q0, q4
+	vadd.i32	q1, q1, q5
+	vadd.i32	q2, q2, q6
+	vadd.i32	q3, q3, q7
+
+	veor		q12, q12, q0
+	veor		q13, q13, q1
+	veor		q14, q14, q2
+	veor		q15, q15, q3
+
+	vrev32.16	q12, q12
+	vrev32.16	q13, q13
+	vrev32.16	q14, q14
+	vrev32.16	q15, q15
+
+	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+	vadd.i32	q8, q8, q12
+	vadd.i32	q9, q9, q13
+	vadd.i32	q10, q10, q14
+	vadd.i32	q11, q11, q15
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q4, q8
+	veor		q9, q5, q9
+	vshl.u32	q4, q8, #12
+	vshl.u32	q5, q9, #12
+	vsri.u32	q4, q8, #20
+	vsri.u32	q5, q9, #20
+
+	veor		q8, q6, q10
+	veor		q9, q7, q11
+	vshl.u32	q6, q8, #12
+	vshl.u32	q7, q9, #12
+	vsri.u32	q6, q8, #20
+	vsri.u32	q7, q9, #20
+
+	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+	vld1.8		{d16}, [ip, :64]
+	vadd.i32	q0, q0, q4
+	vadd.i32	q1, q1, q5
+	vadd.i32	q2, q2, q6
+	vadd.i32	q3, q3, q7
+
+	veor		q12, q12, q0
+	veor		q13, q13, q1
+	veor		q14, q14, q2
+	veor		q15, q15, q3
+
+	vtbl.8		d24, {d24}, d16
+	vtbl.8		d25, {d25}, d16
+	vtbl.8		d26, {d26}, d16
+	vtbl.8		d27, {d27}, d16
+	vtbl.8		d28, {d28}, d16
+	vtbl.8		d29, {d29}, d16
+	vtbl.8		d30, {d30}, d16
+	vtbl.8		d31, {d31}, d16
+
+	vld1.32		{q8-q9}, [sp, :256]
+
+	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+	vadd.i32	q8, q8, q12
+	vadd.i32	q9, q9, q13
+	vadd.i32	q10, q10, q14
+	vadd.i32	q11, q11, q15
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q4, q8
+	veor		q9, q5, q9
+	vshl.u32	q4, q8, #7
+	vshl.u32	q5, q9, #7
+	vsri.u32	q4, q8, #25
+	vsri.u32	q5, q9, #25
+
+	veor		q8, q6, q10
+	veor		q9, q7, q11
+	vshl.u32	q6, q8, #7
+	vshl.u32	q7, q9, #7
+	vsri.u32	q6, q8, #25
+	vsri.u32	q7, q9, #25
+
+	vld1.32		{q8-q9}, [sp, :256]
+
+	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+	vadd.i32	q0, q0, q5
+	vadd.i32	q1, q1, q6
+	vadd.i32	q2, q2, q7
+	vadd.i32	q3, q3, q4
+
+	veor		q15, q15, q0
+	veor		q12, q12, q1
+	veor		q13, q13, q2
+	veor		q14, q14, q3
+
+	vrev32.16	q15, q15
+	vrev32.16	q12, q12
+	vrev32.16	q13, q13
+	vrev32.16	q14, q14
+
+	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+	vadd.i32	q10, q10, q15
+	vadd.i32	q11, q11, q12
+	vadd.i32	q8, q8, q13
+	vadd.i32	q9, q9, q14
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q7, q8
+	veor		q9, q4, q9
+	vshl.u32	q7, q8, #12
+	vshl.u32	q4, q9, #12
+	vsri.u32	q7, q8, #20
+	vsri.u32	q4, q9, #20
+
+	veor		q8, q5, q10
+	veor		q9, q6, q11
+	vshl.u32	q5, q8, #12
+	vshl.u32	q6, q9, #12
+	vsri.u32	q5, q8, #20
+	vsri.u32	q6, q9, #20
+
+	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+	vld1.8		{d16}, [ip, :64]
+	vadd.i32	q0, q0, q5
+	vadd.i32	q1, q1, q6
+	vadd.i32	q2, q2, q7
+	vadd.i32	q3, q3, q4
+
+	veor		q15, q15, q0
+	veor		q12, q12, q1
+	veor		q13, q13, q2
+	veor		q14, q14, q3
+
+	vtbl.8		d30, {d30}, d16
+	vtbl.8		d31, {d31}, d16
+	vtbl.8		d24, {d24}, d16
+	vtbl.8		d25, {d25}, d16
+	vtbl.8		d26, {d26}, d16
+	vtbl.8		d27, {d27}, d16
+	vtbl.8		d28, {d28}, d16
+	vtbl.8		d29, {d29}, d16
+
+	vld1.32		{q8-q9}, [sp, :256]
+
+	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+	vadd.i32	q10, q10, q15
+	vadd.i32	q11, q11, q12
+	vadd.i32	q8, q8, q13
+	vadd.i32	q9, q9, q14
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q7, q8
+	veor		q9, q4, q9
+	vshl.u32	q7, q8, #7
+	vshl.u32	q4, q9, #7
+	vsri.u32	q7, q8, #25
+	vsri.u32	q4, q9, #25
+
+	veor		q8, q5, q10
+	veor		q9, q6, q11
+	vshl.u32	q5, q8, #7
+	vshl.u32	q6, q9, #7
+	vsri.u32	q5, q8, #25
+	vsri.u32	q6, q9, #25
+
+	subs		r3, r3, #2
+	bne		.Ldoubleround4
+
+	// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
+	// x8..9[0-3] are on the stack.
+
+	// Re-interleave the words in the first two rows of each block (x0..7).
+	// Also add the counter values 0-3 to x12[0-3].
+	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
+	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
+	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
+	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
+	vzip.32		q6, q7			// => (6 7 6 7) (6 7 6 7)
+	  vadd.u32	q12, q8			// x12 += counter values 0-3
+	vswp		d1, d4
+	vswp		d3, d6
+	  vld1.32	{q8-q9}, [r0]!		// load s0..7
+	vswp		d9, d12
+	vswp		d11, d14
+
+	// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
+	// after XORing the first 32 bytes.
+	vswp		q1, q4
+
+	// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
+
+	// x0..3[0-3] += s0..3[0-3]	(add orig state to 1st row of each block)
+	vadd.u32	q0, q0, q8
+	vadd.u32	q2, q2, q8
+	vadd.u32	q4, q4, q8
+	vadd.u32	q3, q3, q8
+
+	// x4..7[0-3] += s4..7[0-3]	(add orig state to 2nd row of each block)
+	vadd.u32	q1, q1, q9
+	vadd.u32	q6, q6, q9
+	vadd.u32	q5, q5, q9
+	vadd.u32	q7, q7, q9
+
+	// XOR first 32 bytes using keystream from first two rows of first block
+	vld1.8		{q8-q9}, [r2]!
+	veor		q8, q8, q0
+	veor		q9, q9, q1
+	vst1.8		{q8-q9}, [r1]!
+
+	// Re-interleave the words in the last two rows of each block (x8..15).
+	vld1.32		{q8-q9}, [sp, :256]
+	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
+	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
+	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
+	vzip.32		q10, q11	// => (10 11 10 11) (10 11 10 11)
+	  vld1.32	{q0-q1}, [r0]	// load s8..15
+	vswp		d25, d28
+	vswp		d27, d30
+	vswp		d17, d20
+	vswp		d19, d22
+
+	// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
+
+	// x8..11[0-3] += s8..11[0-3]	(add orig state to 3rd row of each block)
+	vadd.u32	q8,  q8,  q0
+	vadd.u32	q10, q10, q0
+	vadd.u32	q9,  q9,  q0
+	vadd.u32	q11, q11, q0
+
+	// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
+	vadd.u32	q12, q12, q1
+	vadd.u32	q14, q14, q1
+	vadd.u32	q13, q13, q1
+	vadd.u32	q15, q15, q1
+
+	// XOR the rest of the data with the keystream
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q8
+	veor		q1, q1, q12
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q2
+	veor		q1, q1, q6
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q10
+	veor		q1, q1, q14
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q4
+	veor		q1, q1, q5
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q9
+	veor		q1, q1, q13
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q3
+	veor		q1, q1, q7
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]
+	  mov		sp, r4		// restore original stack pointer
+	veor		q0, q0, q11
+	veor		q1, q1, q15
+	vst1.8		{q0-q1}, [r1]
+
+	pop		{r4-r5}
+	bx		lr
+ENDPROC(chacha_4block_xor_neon)

diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
new file mode 100644
index 0000000..14cc6b0
--- /dev/null
+++ b/arch/arm/crypto/chacha-neon-glue.c

@@ -0,0 +1,226 @@
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				      int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				       int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
+{
+	u8 buf[CHACHA_BLOCK_SIZE];
+
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+		chacha_4block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
+		state[12] += 4;
+	}
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+		state[12]++;
+	}
+	if (bytes) {
+		memcpy(buf, src, bytes);
+		chacha_block_xor_neon(state, buf, buf, nrounds);
+		memcpy(dst, buf, bytes);
+	}
+}
+
+static int chacha_neon_stream_xor(struct blkcipher_desc *desc,
+				  struct scatterlist *dst,
+				  struct scatterlist *src,
+				  unsigned int nbytes,
+				  struct chacha_ctx *ctx, u8 *iv)
+{
+	struct blkcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CHACHA_BLOCK_SIZE);
+
+	crypto_chacha_init(state, ctx, iv);
+
+	while (walk.nbytes >= CHACHA_BLOCK_SIZE) {
+		kernel_neon_begin();
+		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+			      rounddown(walk.nbytes, CHACHA_BLOCK_SIZE),
+			      ctx->nrounds);
+		kernel_neon_end();
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % CHACHA_BLOCK_SIZE);
+	}
+
+	if (walk.nbytes) {
+		kernel_neon_begin();
+		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+			      walk.nbytes, ctx->nrounds);
+		kernel_neon_end();
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	return err;
+}
+
+static int chacha_neon(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct chacha_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *iv = desc->info;
+
+	if (nbytes <= CHACHA_BLOCK_SIZE || !may_use_simd())
+		return crypto_chacha_crypt(desc, dst, src, nbytes);
+
+	return chacha_neon_stream_xor(desc, dst, src, nbytes, ctx, iv);
+}
+
+static int xchacha_neon(struct blkcipher_desc *desc, struct scatterlist *dst,
+			struct scatterlist *src, unsigned int nbytes)
+{
+	struct chacha_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *iv = desc->info;
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	if (nbytes <= CHACHA_BLOCK_SIZE || !may_use_simd())
+		return crypto_xchacha_crypt(desc, dst, src, nbytes);
+
+	crypto_chacha_init(state, ctx, iv);
+
+	kernel_neon_begin();
+	hchacha_block_neon(state, subctx.key, ctx->nrounds);
+	kernel_neon_end();
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], iv + 24, 8);
+	memcpy(&real_iv[8], iv + 16, 8);
+	return chacha_neon_stream_xor(desc, dst, src, nbytes, &subctx, real_iv);
+}
+
+static struct crypto_alg algs[] = {
+	{
+		.cra_name		= "chacha20",
+		.cra_driver_name	= "chacha20-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+		.cra_blocksize		= 1,
+		.cra_type		= &crypto_blkcipher_type,
+		.cra_ctxsize		= sizeof(struct chacha_ctx),
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.blkcipher = {
+				.min_keysize	= CHACHA_KEY_SIZE,
+				.max_keysize	= CHACHA_KEY_SIZE,
+				.ivsize		= CHACHA_IV_SIZE,
+				.geniv		= "seqiv",
+				.setkey		= crypto_chacha20_setkey,
+				.encrypt	= chacha_neon,
+				.decrypt	= chacha_neon,
+			},
+		},
+	}, {
+		.cra_name		= "xchacha20",
+		.cra_driver_name	= "xchacha20-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+		.cra_blocksize		= 1,
+		.cra_type		= &crypto_blkcipher_type,
+		.cra_ctxsize		= sizeof(struct chacha_ctx),
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.blkcipher = {
+				.min_keysize	= CHACHA_KEY_SIZE,
+				.max_keysize	= CHACHA_KEY_SIZE,
+				.ivsize		= XCHACHA_IV_SIZE,
+				.geniv		= "seqiv",
+				.setkey		= crypto_chacha20_setkey,
+				.encrypt	= xchacha_neon,
+				.decrypt	= xchacha_neon,
+			},
+		},
+	}, {
+		.cra_name		= "xchacha12",
+		.cra_driver_name	= "xchacha12-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+		.cra_blocksize		= 1,
+		.cra_type		= &crypto_blkcipher_type,
+		.cra_ctxsize		= sizeof(struct chacha_ctx),
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.blkcipher = {
+				.min_keysize	= CHACHA_KEY_SIZE,
+				.max_keysize	= CHACHA_KEY_SIZE,
+				.ivsize		= XCHACHA_IV_SIZE,
+				.geniv		= "seqiv",
+				.setkey		= crypto_chacha12_setkey,
+				.encrypt	= xchacha_neon,
+				.decrypt	= xchacha_neon,
+			},
+		},
+	},
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	return crypto_register_algs(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");

diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S
new file mode 100644
index 0000000..434d80a
--- /dev/null
+++ b/arch/arm/crypto/nh-neon-core.S

@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NH - ε-almost-universal hash function, NEON accelerated version
+ *
+ * Copyright 2018 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+
+	KEY		.req	r0
+	MESSAGE		.req	r1
+	MESSAGE_LEN	.req	r2
+	HASH		.req	r3
+
+	PASS0_SUMS	.req	q0
+	PASS0_SUM_A	.req	d0
+	PASS0_SUM_B	.req	d1
+	PASS1_SUMS	.req	q1
+	PASS1_SUM_A	.req	d2
+	PASS1_SUM_B	.req	d3
+	PASS2_SUMS	.req	q2
+	PASS2_SUM_A	.req	d4
+	PASS2_SUM_B	.req	d5
+	PASS3_SUMS	.req	q3
+	PASS3_SUM_A	.req	d6
+	PASS3_SUM_B	.req	d7
+	K0		.req	q4
+	K1		.req	q5
+	K2		.req	q6
+	K3		.req	q7
+	T0		.req	q8
+	T0_L		.req	d16
+	T0_H		.req	d17
+	T1		.req	q9
+	T1_L		.req	d18
+	T1_H		.req	d19
+	T2		.req	q10
+	T2_L		.req	d20
+	T2_H		.req	d21
+	T3		.req	q11
+	T3_L		.req	d22
+	T3_H		.req	d23
+
+.macro _nh_stride	k0, k1, k2, k3
+
+	// Load next message stride
+	vld1.8		{T3}, [MESSAGE]!
+
+	// Load next key stride
+	vld1.32		{\k3}, [KEY]!
+
+	// Add message words to key words
+	vadd.u32	T0, T3, \k0
+	vadd.u32	T1, T3, \k1
+	vadd.u32	T2, T3, \k2
+	vadd.u32	T3, T3, \k3
+
+	// Multiply 32x32 => 64 and accumulate
+	vmlal.u32	PASS0_SUMS, T0_L, T0_H
+	vmlal.u32	PASS1_SUMS, T1_L, T1_H
+	vmlal.u32	PASS2_SUMS, T2_L, T2_H
+	vmlal.u32	PASS3_SUMS, T3_L, T3_H
+.endm
+
+/*
+ * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ *		u8 hash[NH_HASH_BYTES])
+ *
+ * It's guaranteed that message_len % 16 == 0.
+ */
+ENTRY(nh_neon)
+
+	vld1.32		{K0,K1}, [KEY]!
+	  vmov.u64	PASS0_SUMS, #0
+	  vmov.u64	PASS1_SUMS, #0
+	vld1.32		{K2}, [KEY]!
+	  vmov.u64	PASS2_SUMS, #0
+	  vmov.u64	PASS3_SUMS, #0
+
+	subs		MESSAGE_LEN, MESSAGE_LEN, #64
+	blt		.Lloop4_done
+.Lloop4:
+	_nh_stride	K0, K1, K2, K3
+	_nh_stride	K1, K2, K3, K0
+	_nh_stride	K2, K3, K0, K1
+	_nh_stride	K3, K0, K1, K2
+	subs		MESSAGE_LEN, MESSAGE_LEN, #64
+	bge		.Lloop4
+
+.Lloop4_done:
+	ands		MESSAGE_LEN, MESSAGE_LEN, #63
+	beq		.Ldone
+	_nh_stride	K0, K1, K2, K3
+
+	subs		MESSAGE_LEN, MESSAGE_LEN, #16
+	beq		.Ldone
+	_nh_stride	K1, K2, K3, K0
+
+	subs		MESSAGE_LEN, MESSAGE_LEN, #16
+	beq		.Ldone
+	_nh_stride	K2, K3, K0, K1
+
+.Ldone:
+	// Sum the accumulators for each pass, then store the sums to 'hash'
+	vadd.u64	T0_L, PASS0_SUM_A, PASS0_SUM_B
+	vadd.u64	T0_H, PASS1_SUM_A, PASS1_SUM_B
+	vadd.u64	T1_L, PASS2_SUM_A, PASS2_SUM_B
+	vadd.u64	T1_H, PASS3_SUM_A, PASS3_SUM_B
+	vst1.8		{T0-T1}, [HASH]
+	bx		lr
+ENDPROC(nh_neon)

diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c
new file mode 100644
index 0000000..49aae87
--- /dev/null
+++ b/arch/arm/crypto/nhpoly1305-neon-glue.c

@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ * (NEON accelerated version)
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/module.h>
+
+asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+			u8 hash[NH_HASH_BYTES]);
+
+/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
+static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
+		     __le64 hash[NH_NUM_PASSES])
+{
+	nh_neon(key, message, message_len, (u8 *)hash);
+}
+
+static int nhpoly1305_neon_update(struct shash_desc *desc,
+				  const u8 *src, unsigned int srclen)
+{
+	if (srclen < 64 || !may_use_simd())
+		return crypto_nhpoly1305_update(desc, src, srclen);
+
+	do {
+		unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+
+		kernel_neon_begin();
+		crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+		kernel_neon_end();
+		src += n;
+		srclen -= n;
+	} while (srclen);
+	return 0;
+}
+
+static struct shash_alg nhpoly1305_alg = {
+	.base.cra_name		= "nhpoly1305",
+	.base.cra_driver_name	= "nhpoly1305-neon",
+	.base.cra_priority	= 200,
+	.base.cra_ctxsize	= sizeof(struct nhpoly1305_key),
+	.base.cra_module	= THIS_MODULE,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.init			= crypto_nhpoly1305_init,
+	.update			= nhpoly1305_neon_update,
+	.final			= crypto_nhpoly1305_final,
+	.setkey			= crypto_nhpoly1305_setkey,
+	.descsize		= sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-neon");

diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S
deleted file mode 100644
index 3c1e203..0000000
--- a/arch/arm/crypto/speck-neon-core.S
+++ /dev/null

@@ -1,432 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.fpu		neon
-
-	// arguments
-	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
-	NROUNDS		.req	r1	// int nrounds
-	DST		.req	r2	// void *dst
-	SRC		.req	r3	// const void *src
-	NBYTES		.req	r4	// unsigned int nbytes
-	TWEAK		.req	r5	// void *tweak
-
-	// registers which hold the data being encrypted/decrypted
-	X0		.req	q0
-	X0_L		.req	d0
-	X0_H		.req	d1
-	Y0		.req	q1
-	Y0_H		.req	d3
-	X1		.req	q2
-	X1_L		.req	d4
-	X1_H		.req	d5
-	Y1		.req	q3
-	Y1_H		.req	d7
-	X2		.req	q4
-	X2_L		.req	d8
-	X2_H		.req	d9
-	Y2		.req	q5
-	Y2_H		.req	d11
-	X3		.req	q6
-	X3_L		.req	d12
-	X3_H		.req	d13
-	Y3		.req	q7
-	Y3_H		.req	d15
-
-	// the round key, duplicated in all lanes
-	ROUND_KEY	.req	q8
-	ROUND_KEY_L	.req	d16
-	ROUND_KEY_H	.req	d17
-
-	// index vector for vtbl-based 8-bit rotates
-	ROTATE_TABLE	.req	d18
-
-	// multiplication table for updating XTS tweaks
-	GF128MUL_TABLE	.req	d19
-	GF64MUL_TABLE	.req	d19
-
-	// current XTS tweak value(s)
-	TWEAKV		.req	q10
-	TWEAKV_L	.req	d20
-	TWEAKV_H	.req	d21
-
-	TMP0		.req	q12
-	TMP0_L		.req	d24
-	TMP0_H		.req	d25
-	TMP1		.req	q13
-	TMP2		.req	q14
-	TMP3		.req	q15
-
-	.align		4
-.Lror64_8_table:
-	.byte		1, 2, 3, 4, 5, 6, 7, 0
-.Lror32_8_table:
-	.byte		1, 2, 3, 0, 5, 6, 7, 4
-.Lrol64_8_table:
-	.byte		7, 0, 1, 2, 3, 4, 5, 6
-.Lrol32_8_table:
-	.byte		3, 0, 1, 2, 7, 4, 5, 6
-.Lgf128mul_table:
-	.byte		0, 0x87
-	.fill		14
-.Lgf64mul_table:
-	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
-	.fill		12
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- *
- * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
- * the vtbl approach is faster on some processors and the same speed on others.
- */
-.macro _speck_round_128bytes	n
-
-	// x = ror(x, 8)
-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
-
-	// x += y
-	vadd.u\n	X0, Y0
-	vadd.u\n	X1, Y1
-	vadd.u\n	X2, Y2
-	vadd.u\n	X3, Y3
-
-	// x ^= k
-	veor		X0, ROUND_KEY
-	veor		X1, ROUND_KEY
-	veor		X2, ROUND_KEY
-	veor		X3, ROUND_KEY
-
-	// y = rol(y, 3)
-	vshl.u\n	TMP0, Y0, #3
-	vshl.u\n	TMP1, Y1, #3
-	vshl.u\n	TMP2, Y2, #3
-	vshl.u\n	TMP3, Y3, #3
-	vsri.u\n	TMP0, Y0, #(\n - 3)
-	vsri.u\n	TMP1, Y1, #(\n - 3)
-	vsri.u\n	TMP2, Y2, #(\n - 3)
-	vsri.u\n	TMP3, Y3, #(\n - 3)
-
-	// y ^= x
-	veor		Y0, TMP0, X0
-	veor		Y1, TMP1, X1
-	veor		Y2, TMP2, X2
-	veor		Y3, TMP3, X3
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes	n
-
-	// y ^= x
-	veor		TMP0, Y0, X0
-	veor		TMP1, Y1, X1
-	veor		TMP2, Y2, X2
-	veor		TMP3, Y3, X3
-
-	// y = ror(y, 3)
-	vshr.u\n	Y0, TMP0, #3
-	vshr.u\n	Y1, TMP1, #3
-	vshr.u\n	Y2, TMP2, #3
-	vshr.u\n	Y3, TMP3, #3
-	vsli.u\n	Y0, TMP0, #(\n - 3)
-	vsli.u\n	Y1, TMP1, #(\n - 3)
-	vsli.u\n	Y2, TMP2, #(\n - 3)
-	vsli.u\n	Y3, TMP3, #(\n - 3)
-
-	// x ^= k
-	veor		X0, ROUND_KEY
-	veor		X1, ROUND_KEY
-	veor		X2, ROUND_KEY
-	veor		X3, ROUND_KEY
-
-	// x -= y
-	vsub.u\n	X0, Y0
-	vsub.u\n	X1, Y1
-	vsub.u\n	X2, Y2
-	vsub.u\n	X3, Y3
-
-	// x = rol(x, 8);
-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
-.endm
-
-.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
-
-	// Load the next source block
-	vld1.8		{\dst_reg}, [SRC]!
-
-	// Save the current tweak in the tweak buffer
-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
-
-	// XOR the next source block with the current tweak
-	veor		\dst_reg, TWEAKV
-
-	/*
-	 * Calculate the next tweak by multiplying the current one by x,
-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
-	 */
-	vshr.u64	\tmp, TWEAKV, #63
-	vshl.u64	TWEAKV, #1
-	veor		TWEAKV_H, \tmp\()_L
-	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
-	veor		TWEAKV_L, \tmp\()_H
-.endm
-
-.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
-
-	// Load the next two source blocks
-	vld1.8		{\dst_reg}, [SRC]!
-
-	// Save the current two tweaks in the tweak buffer
-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
-
-	// XOR the next two source blocks with the current two tweaks
-	veor		\dst_reg, TWEAKV
-
-	/*
-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
-	 */
-	vshr.u64	\tmp, TWEAKV, #62
-	vshl.u64	TWEAKV, #2
-	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
-	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
-	veor		TWEAKV, \tmp
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt	n, decrypting
-	push		{r4-r7}
-	mov		r7, sp
-
-	/*
-	 * The first four parameters were passed in registers r0-r3.  Load the
-	 * additional parameters, which were passed on the stack.
-	 */
-	ldr		NBYTES, [sp, #16]
-	ldr		TWEAK, [sp, #20]
-
-	/*
-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
-	 * round key rather than the first, since for decryption the round keys
-	 * are used in reverse order.
-	 */
-.if \decrypting
-.if \n == 64
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
-	sub		ROUND_KEYS, #8
-.else
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
-	sub		ROUND_KEYS, #4
-.endif
-.endif
-
-	// Load the index vector for vtbl-based 8-bit rotates
-.if \decrypting
-	ldr		r12, =.Lrol\n\()_8_table
-.else
-	ldr		r12, =.Lror\n\()_8_table
-.endif
-	vld1.8		{ROTATE_TABLE}, [r12:64]
-
-	// One-time XTS preparation
-
-	/*
-	 * Allocate stack space to store 128 bytes worth of tweaks.  For
-	 * performance, this space is aligned to a 16-byte boundary so that we
-	 * can use the load/store instructions that declare 16-byte alignment.
-	 */
-	sub		sp, #128
-	bic		sp, #0xf
-
-.if \n == 64
-	// Load first tweak
-	vld1.8		{TWEAKV}, [TWEAK]
-
-	// Load GF(2^128) multiplication table
-	ldr		r12, =.Lgf128mul_table
-	vld1.8		{GF128MUL_TABLE}, [r12:64]
-.else
-	// Load first tweak
-	vld1.8		{TWEAKV_L}, [TWEAK]
-
-	// Load GF(2^64) multiplication table
-	ldr		r12, =.Lgf64mul_table
-	vld1.8		{GF64MUL_TABLE}, [r12:64]
-
-	// Calculate second tweak, packing it together with the first
-	vshr.u64	TMP0_L, TWEAKV_L, #63
-	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
-	vshl.u64	TWEAKV_H, TWEAKV_L, #1
-	veor		TWEAKV_H, TMP0_L
-.endif
-
-.Lnext_128bytes_\@:
-
-	/*
-	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
-	 * values, and save the tweaks on the stack for later.  Then
-	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
-	 * that the X[0-3] registers contain only the second halves of blocks,
-	 * and the Y[0-3] registers contain only the first halves of blocks.
-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
-	 */
-	mov		r12, sp
-.if \n == 64
-	_xts128_precrypt_one	X0, r12, TMP0
-	_xts128_precrypt_one	Y0, r12, TMP0
-	_xts128_precrypt_one	X1, r12, TMP0
-	_xts128_precrypt_one	Y1, r12, TMP0
-	_xts128_precrypt_one	X2, r12, TMP0
-	_xts128_precrypt_one	Y2, r12, TMP0
-	_xts128_precrypt_one	X3, r12, TMP0
-	_xts128_precrypt_one	Y3, r12, TMP0
-	vswp		X0_L, Y0_H
-	vswp		X1_L, Y1_H
-	vswp		X2_L, Y2_H
-	vswp		X3_L, Y3_H
-.else
-	_xts64_precrypt_two	X0, r12, TMP0
-	_xts64_precrypt_two	Y0, r12, TMP0
-	_xts64_precrypt_two	X1, r12, TMP0
-	_xts64_precrypt_two	Y1, r12, TMP0
-	_xts64_precrypt_two	X2, r12, TMP0
-	_xts64_precrypt_two	Y2, r12, TMP0
-	_xts64_precrypt_two	X3, r12, TMP0
-	_xts64_precrypt_two	Y3, r12, TMP0
-	vuzp.32		Y0, X0
-	vuzp.32		Y1, X1
-	vuzp.32		Y2, X2
-	vuzp.32		Y3, X3
-.endif
-
-	// Do the cipher rounds
-
-	mov		r12, ROUND_KEYS
-	mov		r6, NROUNDS
-
-.Lnext_round_\@:
-.if \decrypting
-.if \n == 64
-	vld1.64		ROUND_KEY_L, [r12]
-	sub		r12, #8
-	vmov		ROUND_KEY_H, ROUND_KEY_L
-.else
-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
-	sub		r12, #4
-.endif
-	_speck_unround_128bytes	\n
-.else
-.if \n == 64
-	vld1.64		ROUND_KEY_L, [r12]!
-	vmov		ROUND_KEY_H, ROUND_KEY_L
-.else
-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
-.endif
-	_speck_round_128bytes	\n
-.endif
-	subs		r6, r6, #1
-	bne		.Lnext_round_\@
-
-	// Re-interleave the 'x' and 'y' elements of each block
-.if \n == 64
-	vswp		X0_L, Y0_H
-	vswp		X1_L, Y1_H
-	vswp		X2_L, Y2_H
-	vswp		X3_L, Y3_H
-.else
-	vzip.32		Y0, X0
-	vzip.32		Y1, X1
-	vzip.32		Y2, X2
-	vzip.32		Y3, X3
-.endif
-
-	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
-	mov		r12, sp
-	vld1.8		{TMP0, TMP1}, [r12:128]!
-	vld1.8		{TMP2, TMP3}, [r12:128]!
-	veor		X0, TMP0
-	veor		Y0, TMP1
-	veor		X1, TMP2
-	veor		Y1, TMP3
-	vld1.8		{TMP0, TMP1}, [r12:128]!
-	vld1.8		{TMP2, TMP3}, [r12:128]!
-	veor		X2, TMP0
-	veor		Y2, TMP1
-	veor		X3, TMP2
-	veor		Y3, TMP3
-
-	// Store the ciphertext in the destination buffer
-	vst1.8		{X0, Y0}, [DST]!
-	vst1.8		{X1, Y1}, [DST]!
-	vst1.8		{X2, Y2}, [DST]!
-	vst1.8		{X3, Y3}, [DST]!
-
-	// Continue if there are more 128-byte chunks remaining, else return
-	subs		NBYTES, #128
-	bne		.Lnext_128bytes_\@
-
-	// Store the next tweak
-.if \n == 64
-	vst1.8		{TWEAKV}, [TWEAK]
-.else
-	vst1.8		{TWEAKV_L}, [TWEAK]
-.endif
-
-	mov		sp, r7
-	pop		{r4-r7}
-	bx		lr
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
-	_speck_xts_crypt	n=64, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
-	_speck_xts_crypt	n=64, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
-	_speck_xts_crypt	n=32, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
-	_speck_xts_crypt	n=32, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)

diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c
deleted file mode 100644
index ea36c3a..0000000
--- a/arch/arm/crypto/speck-neon-glue.c
+++ /dev/null

@@ -1,314 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
- * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
- * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
- * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
- * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
- * OCB and PMAC"), represented as 0x1B.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE	128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
-	struct speck128_tfm_ctx main_key;
-	struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
-				     u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
-					  const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes,
-		     speck128_crypt_one_t crypt_one,
-		     speck128_xts_crypt_many_t crypt_many)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	const struct speck128_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	struct blkcipher_walk walk;
-	le128 tweak;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE);
-
-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
-			gf128mul_x_ble((be128 *)&tweak, (const be128 *)&tweak);
-
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck128_xts_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src,
-				unsigned int nbytes)
-{
-	return __speck128_xts_crypt(desc, dst, src, nbytes,
-				    crypto_speck128_encrypt,
-				    speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src,
-				unsigned int nbytes)
-{
-	return __speck128_xts_crypt(desc, dst, src, nbytes,
-				    crypto_speck128_decrypt,
-				    speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
-			       unsigned int keylen)
-{
-	struct speck128_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	if (keylen % 2)
-		return -EINVAL;
-
-	keylen /= 2;
-
-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
-	struct speck64_tfm_ctx main_key;
-	struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
-				    u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
-					 const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		    struct scatterlist *src, unsigned int nbytes,
-		    speck64_crypt_one_t crypt_one,
-		    speck64_xts_crypt_many_t crypt_many)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	const struct speck64_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	struct blkcipher_walk walk;
-	__le64 tweak;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE);
-
-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			*(__le64 *)dst ^= tweak;
-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
-					     0x1B : 0));
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck64_xts_encrypt(struct blkcipher_desc *desc,
-			       struct scatterlist *dst, struct scatterlist *src,
-			       unsigned int nbytes)
-{
-	return __speck64_xts_crypt(desc, dst, src, nbytes,
-				   crypto_speck64_encrypt,
-				   speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct blkcipher_desc *desc,
-			       struct scatterlist *dst, struct scatterlist *src,
-			       unsigned int nbytes)
-{
-	return __speck64_xts_crypt(desc, dst, src, nbytes,
-				   crypto_speck64_decrypt,
-				   speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct speck64_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	if (keylen % 2)
-		return -EINVAL;
-
-	keylen /= 2;
-
-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct crypto_alg speck_algs[] = {
-	{
-		.cra_name		= "xts(speck128)",
-		.cra_driver_name	= "xts-speck128-neon",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-		.cra_blocksize		= SPECK128_BLOCK_SIZE,
-		.cra_type		= &crypto_blkcipher_type,
-		.cra_ctxsize		= sizeof(struct speck128_xts_tfm_ctx),
-		.cra_alignmask		= 7,
-		.cra_module		= THIS_MODULE,
-		.cra_u = {
-			.blkcipher = {
-				.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
-				.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
-				.ivsize			= SPECK128_BLOCK_SIZE,
-				.setkey			= speck128_xts_setkey,
-				.encrypt		= speck128_xts_encrypt,
-				.decrypt		= speck128_xts_decrypt,
-			}
-		}
-	}, {
-		.cra_name		= "xts(speck64)",
-		.cra_driver_name	= "xts-speck64-neon",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-		.cra_blocksize		= SPECK64_BLOCK_SIZE,
-		.cra_type		= &crypto_blkcipher_type,
-		.cra_ctxsize		= sizeof(struct speck64_xts_tfm_ctx),
-		.cra_alignmask		= 7,
-		.cra_module		= THIS_MODULE,
-		.cra_u = {
-			.blkcipher = {
-				.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
-				.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
-				.ivsize			= SPECK64_BLOCK_SIZE,
-				.setkey			= speck64_xts_setkey,
-				.encrypt		= speck64_xts_encrypt,
-				.decrypt		= speck64_xts_decrypt,
-			}
-		}
-	}
-};
-
-static int __init speck_neon_module_init(void)
-{
-	if (!(elf_hwcap & HWCAP_NEON))
-		return -ENODEV;
-	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
-	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 56be67ec..60f148f 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S

@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 
 #ifdef CONFIG_NEED_RET_TO_USER
 #include <mach/entry-macro.S>
@@ -35,6 +36,9 @@
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
@@ -61,6 +65,9 @@
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -93,6 +100,9 @@
 ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 6bee5c9..a53e2b8 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c

@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -634,3 +635,9 @@ struct page *get_signal_page(void)
 
 	return page;
 }
+
+/* Defer to generic check */
+asmlinkage void addr_limit_check_failed(void)
+{
+	addr_limit_user_check();
+}

diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig
new file mode 100644
index 0000000..1ff6114
--- /dev/null
+++ b/arch/arm64/configs/cuttlefish_defconfig

@@ -0,0 +1,435 @@
+# CONFIG_FHANDLE is not set
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+# CONFIG_PROC_PID_CPUSET is not set
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHEDTUNE=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_BPF=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_TUNE=y
+CONFIG_DEFAULT_USE_ENERGY_AWARE=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_SGETMASK_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_CC_STACKPROTECTOR_STRONG=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_PCI=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PREEMPT=y
+CONFIG_HZ_100=y
+# CONFIG_SPARSEMEM_VMEMMAP is not set
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZSMALLOC=y
+CONFIG_SECCOMP=y
+CONFIG_PARAVIRT=y
+CONFIG_ARMV8_DEPRECATED=y
+CONFIG_SWP_EMULATION=y
+CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_SETEND_EMULATION=y
+CONFIG_ARM64_SW_TTBR0_PAN=y
+CONFIG_ARM64_LSE_ATOMICS=y
+CONFIG_RANDOMIZE_BASE=y
+CONFIG_CMDLINE="console=ttyAMA0"
+CONFIG_CMDLINE_EXTEND=y
+# CONFIG_EFI is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_COMPAT=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_CPU_IDLE=y
+CONFIG_ARM_CPUIDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPUFREQ_DT=y
+CONFIG_ARM_BIG_LITTLE_CPUFREQ=y
+CONFIG_ARM_DT_BL_CPUFREQ=y
+CONFIG_ARM_SCPI_CPUFREQ=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_RPFILTER=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_L2TP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_CFG80211=y
+# CONFIG_CFG80211_DEFAULT_PS is not set
+CONFIG_MAC80211=y
+# CONFIG_MAC80211_RC_MINSTREL is not set
+CONFIG_RFKILL=y
+# CONFIG_UEVENT_HELPER is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_DEBUG_DEVRES=y
+CONFIG_OF_UNITTEST=y
+CONFIG_ZRAM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_UID_SYS_STATS=y
+CONFIG_SCSI=y
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_VERITY_AVB=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_ETHERNET is not set
+CONFIG_PHYLIB=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPTP=y
+CONFIG_PPPOL2TP=y
+CONFIG_USB_USBNET=y
+# CONFIG_USB_NET_AX8817X is not set
+# CONFIG_USB_NET_AX88179_178A is not set
+# CONFIG_USB_NET_CDCETHER is not set
+# CONFIG_USB_NET_CDC_NCM is not set
+# CONFIG_USB_NET_NET1080 is not set
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+# CONFIG_WLAN_VENDOR_ATH is not set
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+# CONFIG_WLAN_VENDOR_REALTEK is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+CONFIG_VIRT_WIFI=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
+# CONFIG_HW_RANDOM_CAVIUM is not set
+# CONFIG_DEVPORT is not set
+# CONFIG_I2C_COMPAT is not set
+# CONFIG_I2C_HELPER_AUTO is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_CPU_THERMAL=y
+CONFIG_MEDIA_SUPPORT=y
+# CONFIG_DVB_TUNER_DIB0070 is not set
+# CONFIG_DVB_TUNER_DIB0090 is not set
+# CONFIG_VGA_ARB is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_CONFIGFS=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_ACC=y
+CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
+CONFIG_USB_CONFIGFS_UEVENT=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+# CONFIG_MMC_BLOCK is not set
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+# CONFIG_RTC_SYSTOHC is not set
+CONFIG_RTC_DRV_PL031=y
+CONFIG_VIRTIO_PCI=y
+# CONFIG_VIRTIO_PCI_LEGACY is not set
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_VSOC=y
+CONFIG_ION=y
+CONFIG_COMMON_CLK_SCPI=y
+# CONFIG_COMMON_CLK_XGENE is not set
+CONFIG_MAILBOX=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ARM_SCPI_PROTOCOL=y
+# CONFIG_ARM_SCPI_POWER_DOMAIN is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_F2FS_FS=y
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_FS_ENCRYPTION=y
+# CONFIG_DNOTIFY is not set
+CONFIG_QUOTA=y
+CONFIG_QFMT_V2=y
+CONFIG_FUSE_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SDCARD_FS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_LSM_MMAP_MIN_ADDR=65536
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_CRYPTO_ADIANTUM=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_ZSTD=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_XZ_DEC=y

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2b10984..2cf32e9 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig

@@ -53,11 +53,4 @@
 	tristate "CRC32 and CRC32C using optional ARMv8 instructions"
 	depends on ARM64
 	select CRYPTO_HASH
-
-config CRYPTO_SPECK_NEON
-	tristate "NEON accelerated Speck cipher algorithms"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_GF128MUL
-	select CRYPTO_SPECK
 endif

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 9908765..550e02a 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile

@@ -30,9 +30,6 @@
 obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
 aes-neon-blk-y := aes-glue-neon.o aes-neon.o
 
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
-
 AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
 AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
 

diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S
deleted file mode 100644
index b144634..0000000
--- a/arch/arm64/crypto/speck-neon-core.S
+++ /dev/null

@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-	.text
-
-	// arguments
-	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
-	NROUNDS		.req	w1	// int nrounds
-	NROUNDS_X	.req	x1
-	DST		.req	x2	// void *dst
-	SRC		.req	x3	// const void *src
-	NBYTES		.req	w4	// unsigned int nbytes
-	TWEAK		.req	x5	// void *tweak
-
-	// registers which hold the data being encrypted/decrypted
-	// (underscores avoid a naming collision with ARM64 registers x0-x3)
-	X_0		.req	v0
-	Y_0		.req	v1
-	X_1		.req	v2
-	Y_1		.req	v3
-	X_2		.req	v4
-	Y_2		.req	v5
-	X_3		.req	v6
-	Y_3		.req	v7
-
-	// the round key, duplicated in all lanes
-	ROUND_KEY	.req	v8
-
-	// index vector for tbl-based 8-bit rotates
-	ROTATE_TABLE	.req	v9
-	ROTATE_TABLE_Q	.req	q9
-
-	// temporary registers
-	TMP0		.req	v10
-	TMP1		.req	v11
-	TMP2		.req	v12
-	TMP3		.req	v13
-
-	// multiplication table for updating XTS tweaks
-	GFMUL_TABLE	.req	v14
-	GFMUL_TABLE_Q	.req	q14
-
-	// next XTS tweak value(s)
-	TWEAKV_NEXT	.req	v15
-
-	// XTS tweaks for the blocks currently being encrypted/decrypted
-	TWEAKV0		.req	v16
-	TWEAKV1		.req	v17
-	TWEAKV2		.req	v18
-	TWEAKV3		.req	v19
-	TWEAKV4		.req	v20
-	TWEAKV5		.req	v21
-	TWEAKV6		.req	v22
-	TWEAKV7		.req	v23
-
-	.align		4
-.Lror64_8_table:
-	.octa		0x080f0e0d0c0b0a090007060504030201
-.Lror32_8_table:
-	.octa		0x0c0f0e0d080b0a090407060500030201
-.Lrol64_8_table:
-	.octa		0x0e0d0c0b0a09080f0605040302010007
-.Lrol32_8_table:
-	.octa		0x0e0d0c0f0a09080b0605040702010003
-.Lgf128mul_table:
-	.octa		0x00000000000000870000000000000001
-.Lgf64mul_table:
-	.octa		0x0000000000000000000000002d361b00
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
- */
-.macro _speck_round_128bytes	n, lanes
-
-	// x = ror(x, 8)
-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-
-	// x += y
-	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
-	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
-	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
-	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
-	// x ^= k
-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
-
-	// y = rol(y, 3)
-	shl		TMP0.\lanes, Y_0.\lanes, #3
-	shl		TMP1.\lanes, Y_1.\lanes, #3
-	shl		TMP2.\lanes, Y_2.\lanes, #3
-	shl		TMP3.\lanes, Y_3.\lanes, #3
-	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
-	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
-	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
-	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
-
-	// y ^= x
-	eor		Y_0.16b, TMP0.16b, X_0.16b
-	eor		Y_1.16b, TMP1.16b, X_1.16b
-	eor		Y_2.16b, TMP2.16b, X_2.16b
-	eor		Y_3.16b, TMP3.16b, X_3.16b
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes	n, lanes
-
-	// y ^= x
-	eor		TMP0.16b, Y_0.16b, X_0.16b
-	eor		TMP1.16b, Y_1.16b, X_1.16b
-	eor		TMP2.16b, Y_2.16b, X_2.16b
-	eor		TMP3.16b, Y_3.16b, X_3.16b
-
-	// y = ror(y, 3)
-	ushr		Y_0.\lanes, TMP0.\lanes, #3
-	ushr		Y_1.\lanes, TMP1.\lanes, #3
-	ushr		Y_2.\lanes, TMP2.\lanes, #3
-	ushr		Y_3.\lanes, TMP3.\lanes, #3
-	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
-	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
-	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
-	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
-
-	// x ^= k
-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
-
-	// x -= y
-	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
-	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
-	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
-	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
-	// x = rol(x, 8)
-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-.endm
-
-.macro _next_xts_tweak	next, cur, tmp, n
-.if \n == 64
-	/*
-	 * Calculate the next tweak by multiplying the current one by x,
-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
-	 */
-	sshr		\tmp\().2d, \cur\().2d, #63
-	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
-	shl		\next\().2d, \cur\().2d, #1
-	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
-	eor		\next\().16b, \next\().16b, \tmp\().16b
-.else
-	/*
-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
-	 */
-	ushr		\tmp\().2d, \cur\().2d, #62
-	shl		\next\().2d, \cur\().2d, #2
-	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
-	eor		\next\().16b, \next\().16b, \tmp\().16b
-.endif
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt	n, lanes, decrypting
-
-	/*
-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
-	 * round key rather than the first, since for decryption the round keys
-	 * are used in reverse order.
-	 */
-.if \decrypting
-	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
-.if \n == 64
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
-	sub		ROUND_KEYS, ROUND_KEYS, #8
-.else
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
-	sub		ROUND_KEYS, ROUND_KEYS, #4
-.endif
-.endif
-
-	// Load the index vector for tbl-based 8-bit rotates
-.if \decrypting
-	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
-.else
-	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
-.endif
-
-	// One-time XTS preparation
-.if \n == 64
-	// Load first tweak
-	ld1		{TWEAKV0.16b}, [TWEAK]
-
-	// Load GF(2^128) multiplication table
-	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
-.else
-	// Load first tweak
-	ld1		{TWEAKV0.8b}, [TWEAK]
-
-	// Load GF(2^64) multiplication table
-	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
-
-	// Calculate second tweak, packing it together with the first
-	ushr		TMP0.2d, TWEAKV0.2d, #63
-	shl		TMP1.2d, TWEAKV0.2d, #1
-	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
-	eor		TMP0.8b, TMP0.8b, TMP1.8b
-	mov		TWEAKV0.d[1], TMP0.d[0]
-.endif
-
-.Lnext_128bytes_\@:
-
-	// Calculate XTS tweaks for next 128 bytes
-	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
-	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
-	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
-	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
-	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
-	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
-	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
-	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
-
-	// Load the next source blocks into {X,Y}[0-3]
-	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
-	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
-
-	// XOR the source blocks with their XTS tweaks
-	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
-	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
-	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
-	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
-	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
-	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
-	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
-	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
-
-	/*
-	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
-	 * that the X[0-3] registers contain only the second halves of blocks,
-	 * and the Y[0-3] registers contain only the first halves of blocks.
-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
-	 */
-	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
-	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
-	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
-	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
-	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
-	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
-	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
-	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
-
-	// Do the cipher rounds
-	mov		x6, ROUND_KEYS
-	mov		w7, NROUNDS
-.Lnext_round_\@:
-.if \decrypting
-	ld1r		{ROUND_KEY.\lanes}, [x6]
-	sub		x6, x6, #( \n / 8 )
-	_speck_unround_128bytes	\n, \lanes
-.else
-	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
-	_speck_round_128bytes	\n, \lanes
-.endif
-	subs		w7, w7, #1
-	bne		.Lnext_round_\@
-
-	// Re-interleave the 'x' and 'y' elements of each block
-	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
-	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
-	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
-	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
-	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
-	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
-	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
-	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
-
-	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
-	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
-	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
-	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
-	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
-	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
-	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
-	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
-	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
-	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
-
-	// Store the ciphertext in the destination buffer
-	st1		{X_0.16b-Y_1.16b}, [DST], #64
-	st1		{X_2.16b-Y_3.16b}, [DST], #64
-
-	// Continue if there are more 128-byte chunks remaining
-	subs		NBYTES, NBYTES, #128
-	bne		.Lnext_128bytes_\@
-
-	// Store the next tweak and return
-.if \n == 64
-	st1		{TWEAKV_NEXT.16b}, [TWEAK]
-.else
-	st1		{TWEAKV_NEXT.8b}, [TWEAK]
-.endif
-	ret
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
-	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
-	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
-	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
-	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)

diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c
deleted file mode 100644
index c7d1856..0000000
--- a/arch/arm64/crypto/speck-neon-glue.c
+++ /dev/null

@@ -1,308 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- * (64-bit version; based on the 32-bit version)
- *
- * Copyright (c) 2018 Google, Inc
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE	128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
-	struct speck128_tfm_ctx main_key;
-	struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
-				     u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
-					  const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes,
-		     speck128_crypt_one_t crypt_one,
-		     speck128_xts_crypt_many_t crypt_many)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	const struct speck128_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	struct blkcipher_walk walk;
-	le128 tweak;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE);
-
-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
-			gf128mul_x_ble((be128 *)&tweak, (const be128 *)&tweak);
-
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck128_xts_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src,
-				unsigned int nbytes)
-{
-	return __speck128_xts_crypt(desc, dst, src, nbytes,
-				    crypto_speck128_encrypt,
-				    speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src,
-				unsigned int nbytes)
-{
-	return __speck128_xts_crypt(desc, dst, src, nbytes,
-				    crypto_speck128_decrypt,
-				    speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
-			       unsigned int keylen)
-{
-	struct speck128_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	if (keylen % 2)
-		return -EINVAL;
-
-	keylen /= 2;
-
-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
-	struct speck64_tfm_ctx main_key;
-	struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
-				    u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
-					 const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		    struct scatterlist *src, unsigned int nbytes,
-		    speck64_crypt_one_t crypt_one,
-		    speck64_xts_crypt_many_t crypt_many)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	const struct speck64_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	struct blkcipher_walk walk;
-	__le64 tweak;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE);
-
-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			*(__le64 *)dst ^= tweak;
-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
-					     0x1B : 0));
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck64_xts_encrypt(struct blkcipher_desc *desc,
-			       struct scatterlist *dst, struct scatterlist *src,
-			       unsigned int nbytes)
-{
-	return __speck64_xts_crypt(desc, dst, src, nbytes,
-				   crypto_speck64_encrypt,
-				   speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct blkcipher_desc *desc,
-			       struct scatterlist *dst, struct scatterlist *src,
-			       unsigned int nbytes)
-{
-	return __speck64_xts_crypt(desc, dst, src, nbytes,
-				   crypto_speck64_decrypt,
-				   speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct speck64_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	if (keylen % 2)
-		return -EINVAL;
-
-	keylen /= 2;
-
-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct crypto_alg speck_algs[] = {
-	{
-		.cra_name		= "xts(speck128)",
-		.cra_driver_name	= "xts-speck128-neon",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-		.cra_blocksize		= SPECK128_BLOCK_SIZE,
-		.cra_type		= &crypto_blkcipher_type,
-		.cra_ctxsize		= sizeof(struct speck128_xts_tfm_ctx),
-		.cra_alignmask		= 7,
-		.cra_module		= THIS_MODULE,
-		.cra_u = {
-			.blkcipher = {
-				.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
-				.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
-				.ivsize			= SPECK128_BLOCK_SIZE,
-				.setkey			= speck128_xts_setkey,
-				.encrypt		= speck128_xts_encrypt,
-				.decrypt		= speck128_xts_decrypt,
-			}
-		}
-	}, {
-		.cra_name		= "xts(speck64)",
-		.cra_driver_name	= "xts-speck64-neon",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-		.cra_blocksize		= SPECK64_BLOCK_SIZE,
-		.cra_type		= &crypto_blkcipher_type,
-		.cra_ctxsize		= sizeof(struct speck64_xts_tfm_ctx),
-		.cra_alignmask		= 7,
-		.cra_module		= THIS_MODULE,
-		.cra_u = {
-			.blkcipher = {
-				.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
-				.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
-				.ivsize			= SPECK64_BLOCK_SIZE,
-				.setkey			= speck64_xts_setkey,
-				.encrypt		= speck64_xts_encrypt,
-				.decrypt		= speck64_xts_decrypt,
-			}
-		}
-	}
-};
-
-static int __init speck_neon_module_init(void)
-{
-	if (!(elf_hwcap & HWCAP_ASIMD))
-		return -ENODEV;
-	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
-	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 898fbaa..8c7c4b2 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h

@@ -34,9 +34,7 @@
 #define ARM64_HAS_32BIT_EL0			13
 #define ARM64_HYP_OFFSET_LOW			14
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
-
 #define ARM64_UNMAP_KERNEL_AT_EL0		16
-
 #define ARM64_HARDEN_BRANCH_PREDICTOR		17
 #define ARM64_SSBD				18
 #define ARM64_MISMATCHED_CACHE_TYPE		19

diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 20b03a02..1abad23 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h

@@ -16,8 +16,8 @@
 #ifndef __ASM_PERCPU_H
 #define __ASM_PERCPU_H
 
-#include <asm/alternative.h>
 #include <asm/stack_pointer.h>
+#include <asm/alternative.h>
 
 static inline void set_my_cpu_offset(unsigned long off)
 {

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8546afc..3ae4418 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h

@@ -109,6 +109,10 @@
 #define SCTLR_EL1_CP15BEN	(1 << 5)
 
 /* id_aa64isar0 */
+#define ID_AA64ISAR0_DP_SHIFT		44
+#define ID_AA64ISAR0_SM4_SHIFT		40
+#define ID_AA64ISAR0_SM3_SHIFT		36
+#define ID_AA64ISAR0_SHA3_SHIFT		32
 #define ID_AA64ISAR0_RDM_SHIFT		28
 #define ID_AA64ISAR0_ATOMICS_SHIFT	20
 #define ID_AA64ISAR0_CRC32_SHIFT	16

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 40e3f26..f37a49c 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h

@@ -82,6 +82,7 @@ struct thread_info {
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
+#define TIF_FSCHECK		4	/* Check FS is USER_DS on return */
 #define TIF_NOHZ		7
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
@@ -104,10 +105,12 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+				 _TIF_FSCHECK)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index de21caa..45b3ac9 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h

@@ -73,6 +73,9 @@ static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
 
+	/* On user-mode return, check fs is correct */
+	set_thread_flag(TIF_FSCHECK);
+
 	/*
 	 * Prevent a mispredicted conditional call to set_fs from forwarding
 	 * the wrong address limit to access_ok under speculation.

diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a739287..1cc77c2 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h

@@ -30,5 +30,10 @@
 #define HWCAP_ATOMICS		(1 << 8)
 #define HWCAP_FPHP		(1 << 9)
 #define HWCAP_ASIMDHP		(1 << 10)
+#define HWCAP_SHA3		(1 << 17)
+#define HWCAP_SM3		(1 << 18)
+#define HWCAP_SM4		(1 << 19)
+#define HWCAP_ASIMDDP		(1 << 20)
+#define HWCAP_SHA512		(1 << 21)
 
 #endif /* _UAPI__ASM_HWCAP_H */

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 789a7c7..5e240b9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c

@@ -83,7 +83,10 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
 
 
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_DP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SHA3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
@@ -963,8 +966,13 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),

diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index a754fa3..42730685 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c

@@ -68,6 +68,17 @@ static const char *const hwcap_str[] = {
 	"atomics",
 	"fphp",
 	"asimdhp",
+	"cpuid",
+	"asimdrdm",
+	"jscvt",
+	"fcma",
+	"lrcpc",
+	"dcpop",
+	"sha3",
+	"sm3",
+	"sm4",
+	"asimddp",
+	"sha512",
 	NULL
 };
 

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 404dd67..c59e6757 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c

@@ -25,6 +25,7 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/ratelimit.h>
+#include <linux/syscalls.h>
 
 #include <asm/debug-monitors.h>
 #include <asm/elf.h>
@@ -408,7 +409,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 	 * Update the trace code with the current status.
 	 */
 	trace_hardirqs_off();
+
 	do {
+		/* Check valid user FS if needed */
+		addr_limit_user_check();
+
 		if (thread_flags & _TIF_NEED_RESCHED) {
 			schedule();
 		} else {

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 4fa6d84..9f2c113 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c

@@ -215,17 +215,16 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 
 	put_task_stack(tsk);
 }
-EXPORT_SYMBOL(save_stack_trace_tsk);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	__save_stack_trace(tsk, trace, 1);
 }
+EXPORT_SYMBOL(save_stack_trace_tsk);
 
 void save_stack_trace(struct stack_trace *trace)
 {
 	__save_stack_trace(current, trace, 0);
 }
-
 EXPORT_SYMBOL_GPL(save_stack_trace);
 #endif

diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig
index 43c9575..bc1fd7f 100644
--- a/arch/x86/configs/x86_64_cuttlefish_defconfig
+++ b/arch/x86/configs/x86_64_cuttlefish_defconfig

@@ -12,6 +12,7 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_MEMCG=y
@@ -51,6 +52,7 @@
 CONFIG_KSM=y
 CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
 CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZSMALLOC=y
 # CONFIG_MTRR is not set
 CONFIG_HZ_100=y
 CONFIG_KEXEC=y
@@ -59,7 +61,8 @@
 CONFIG_RANDOMIZE_BASE=y
 CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 reboot=p"
+CONFIG_CMDLINE="console=ttyS0 reboot=p nopti"
+CONFIG_PM_AUTOSLEEP=y
 CONFIG_PM_WAKELOCKS=y
 CONFIG_PM_WAKELOCKS_LIMIT=0
 # CONFIG_PM_WAKELOCKS_GC is not set
@@ -92,8 +95,8 @@
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=y
 CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_INET_DIAG_DESTROY=y
 CONFIG_TCP_CONG_ADVANCED=y
@@ -108,6 +111,7 @@
 CONFIG_INET6_ESP=y
 CONFIG_INET6_IPCOMP=y
 CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_VTI=y
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NETLABEL=y
 CONFIG_NETFILTER=y
@@ -137,6 +141,7 @@
 CONFIG_NETFILTER_XT_TARGET_TRACE=y
 CONFIG_NETFILTER_XT_TARGET_SECMARK=y
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
 CONFIG_NETFILTER_XT_MATCH_COMMENT=y
 CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
 CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
@@ -187,6 +192,7 @@
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_HTB=y
 CONFIG_NET_CLS_U32=y
+CONFIG_NET_CLS_BPF=y
 CONFIG_NET_EMATCH=y
 CONFIG_NET_EMATCH_U32=y
 CONFIG_NET_CLS_ACT=y
@@ -199,12 +205,12 @@
 CONFIG_OF=y
 CONFIG_OF_UNITTEST=y
 # CONFIG_PNP_DEBUG_MESSAGES is not set
+CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
 CONFIG_VIRTIO_BLK=y
 CONFIG_UID_SYS_STATS=y
-CONFIG_MEMORY_STATE_TIME=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
@@ -221,6 +227,7 @@
 CONFIG_DM_UEVENT=y
 CONFIG_DM_VERITY=y
 CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_ANDROID_VERITY=y
 CONFIG_NETDEVICES=y
 CONFIG_NETCONSOLE=y
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -257,6 +264,7 @@
 # CONFIG_WLAN_VENDOR_TI is not set
 # CONFIG_WLAN_VENDOR_ZYDAS is not set
 CONFIG_MAC80211_HWSIM=y
+CONFIG_VIRT_WIFI=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_KEYRESET=y
 # CONFIG_INPUT_KEYBOARD is not set
@@ -311,11 +319,11 @@
 CONFIG_DRM=y
 # CONFIG_DRM_FBDEV_EMULATION is not set
 CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
 CONFIG_SOUND=y
 CONFIG_SND=y
 CONFIG_HIDRAW=y
 CONFIG_UHID=y
-# CONFIG_HID_GENERIC is not set
 CONFIG_HID_A4TECH=y
 CONFIG_HID_ACRUX=y
 CONFIG_HID_ACRUX_FF=y
@@ -379,6 +387,8 @@
 CONFIG_USB_DUMMY_HCD=y
 CONFIG_USB_CONFIGFS=y
 CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_MTP=y
+CONFIG_USB_CONFIGFS_F_PTP=y
 CONFIG_USB_CONFIGFS_F_ACC=y
 CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
 CONFIG_USB_CONFIGFS_UEVENT=y
@@ -388,6 +398,7 @@
 CONFIG_SW_SYNC=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
 CONFIG_STAGING=y
@@ -403,6 +414,9 @@
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_EXT4_ENCRYPTION=y
+CONFIG_F2FS_FS=y
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_FS_ENCRYPTION=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -436,11 +450,11 @@
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_LOCKUP_DETECTOR=y
 CONFIG_PANIC_TIMEOUT=5
-# CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_UPROBE_EVENT=y
 CONFIG_IO_DELAY_NONE=y
 CONFIG_DEBUG_BOOT_PARAMS=y
 CONFIG_OPTIMIZE_INLINING=y
@@ -451,4 +465,14 @@
 CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+CONFIG_CRYPTO_RSA=y
 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_ADIANTUM=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_ZSTD=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_SYSTEM_TRUSTED_KEYS="verity_dev_keys.x509"

diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 0a5fedf..4446688 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c

@@ -10,7 +10,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -29,31 +29,31 @@ static bool chacha20_use_avx2;
 static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
 			    unsigned int bytes)
 {
-	u8 buf[CHACHA20_BLOCK_SIZE];
+	u8 buf[CHACHA_BLOCK_SIZE];
 
 #ifdef CONFIG_AS_AVX2
 	if (chacha20_use_avx2) {
-		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
+		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha20_8block_xor_avx2(state, dst, src);
-			bytes -= CHACHA20_BLOCK_SIZE * 8;
-			src += CHACHA20_BLOCK_SIZE * 8;
-			dst += CHACHA20_BLOCK_SIZE * 8;
+			bytes -= CHACHA_BLOCK_SIZE * 8;
+			src += CHACHA_BLOCK_SIZE * 8;
+			dst += CHACHA_BLOCK_SIZE * 8;
 			state[12] += 8;
 		}
 	}
 #endif
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		chacha20_4block_xor_ssse3(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
 		state[12] += 4;
 	}
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
+	while (bytes >= CHACHA_BLOCK_SIZE) {
 		chacha20_block_xor_ssse3(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
 		state[12]++;
 	}
 	if (bytes) {
@@ -70,24 +70,24 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
 	struct blkcipher_walk walk;
 	int err;
 
-	if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
-		return crypto_chacha20_crypt(desc, dst, src, nbytes);
+	if (nbytes <= CHACHA_BLOCK_SIZE || !may_use_simd())
+		return crypto_chacha_crypt(desc, dst, src, nbytes);
 
 	state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
+	err = blkcipher_walk_virt_block(desc, &walk, CHACHA_BLOCK_SIZE);
 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
-	crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+	crypto_chacha_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
 
 	kernel_fpu_begin();
 
-	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+	while (walk.nbytes >= CHACHA_BLOCK_SIZE) {
 		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
-				rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+				rounddown(walk.nbytes, CHACHA_BLOCK_SIZE));
 		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % CHACHA20_BLOCK_SIZE);
+					  walk.nbytes % CHACHA_BLOCK_SIZE);
 	}
 
 	if (walk.nbytes) {
@@ -108,14 +108,14 @@ static struct crypto_alg alg = {
 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		= 1,
 	.cra_type		= &crypto_blkcipher_type,
-	.cra_ctxsize		= sizeof(struct chacha20_ctx),
+	.cra_ctxsize		= sizeof(struct chacha_ctx),
 	.cra_alignmask		= sizeof(u32) - 1,
 	.cra_module		= THIS_MODULE,
 	.cra_u			= {
 		.blkcipher = {
-			.min_keysize	= CHACHA20_KEY_SIZE,
-			.max_keysize	= CHACHA20_KEY_SIZE,
-			.ivsize		= CHACHA20_IV_SIZE,
+			.min_keysize	= CHACHA_KEY_SIZE,
+			.max_keysize	= CHACHA_KEY_SIZE,
+			.ivsize		= CHACHA_IV_SIZE,
 			.geniv		= "seqiv",
 			.setkey		= crypto_chacha20_setkey,
 			.encrypt	= chacha20_simd,

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 28c3720..a69670b 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c

@@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
 		if (unlikely(!sctx->wset)) {
 			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r);
+				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+				poly1305_simd_mult(sctx->u, dctx->r.r);
 				sctx->uset = true;
 			}
 			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r);
+			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
 			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r);
+			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
 			sctx->wset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 	}
 #endif
 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
 		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r);
+			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u, dctx->r.r);
 			sctx->uset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 	}
 	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 	return srclen;

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index b0cd306..b83eafa 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c

@@ -22,6 +22,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/nospec.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	addr_limit_user_check();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index d540966..51a858e 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile

@@ -171,7 +171,8 @@
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
 VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \
-	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
+	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) \
+	$(filter --target=% --gcc-toolchain=%,$(KBUILD_CFLAGS))
 GCOV_PROFILE := n
 
 #

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2d8788a..83252c2 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h

@@ -101,6 +101,7 @@ struct thread_info {
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
 #define TIF_X32			30	/* 32-bit native x86-64 binary */
+#define TIF_FSCHECK		31	/* Check FS is USER_DS on return */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -124,6 +125,7 @@ struct thread_info {
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 #define _TIF_X32		(1 << TIF_X32)
+#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 
 /*
  * work to do in syscall_trace_enter().  Also includes TIF_NOHZ for
@@ -137,7 +139,7 @@ struct thread_info {
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK						\
 	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |	\
-	_TIF_NOHZ)
+	_TIF_NOHZ | _TIF_FSCHECK)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0c87840..76a12ab 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h

@@ -8,6 +8,7 @@
 #include <linux/kasan-checks.h>
 #include <linux/thread_info.h>
 #include <linux/string.h>
+#include <linux/sched.h>
 #include <asm/asm.h>
 #include <asm/page.h>
 #include <asm/smap.h>
@@ -31,7 +32,12 @@
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.addr_limit)
-#define set_fs(x)	(current->thread.addr_limit = (x))
+static inline void set_fs(mm_segment_t fs)
+{
+	current->thread.addr_limit = fs;
+	/* On user-mode return, check fs is correct */
+	set_thread_flag(TIF_FSCHECK);
+}
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 

diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64
new file mode 100644
index 0000000..e1f750d
--- /dev/null
+++ b/build.config.cuttlefish.aarch64

@@ -0,0 +1,16 @@
+ARCH=arm64
+BRANCH=android-4.9
+CLANG_TRIPLE=aarch64-linux-gnu-
+CROSS_COMPILE=aarch64-linux-androidkernel-
+DEFCONFIG=cuttlefish_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+POST_DEFCONFIG_CMDS="check_defconfig"
+CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r346389b/bin
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin
+FILES="
+arch/arm64/boot/Image.gz
+vmlinux
+System.map
+"
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64
index 5a96563..f8e85a1 100644
--- a/build.config.cuttlefish.x86_64
+++ b/build.config.cuttlefish.x86_64

@@ -6,10 +6,11 @@
 EXTRA_CMDS=''
 KERNEL_DIR=common
 POST_DEFCONFIG_CMDS="check_defconfig"
-CLANG_PREBUILT_BIN=prebuilts/clang/host/linux-x86/clang-4630689/bin
+CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r346389b/bin
 LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
 FILES="
 arch/x86/boot/bzImage
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm
index 866da93..ff5646a 100644
--- a/build.config.goldfish.arm
+++ b/build.config.goldfish.arm

@@ -10,3 +10,4 @@
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64
index 9c963cf..4c896a67 100644
--- a/build.config.goldfish.arm64
+++ b/build.config.goldfish.arm64

@@ -10,3 +10,4 @@
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips
index 8af53d2..9a14a44 100644
--- a/build.config.goldfish.mips
+++ b/build.config.goldfish.mips

@@ -9,3 +9,4 @@
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64
index 2a33d36..6ad9759 100644
--- a/build.config.goldfish.mips64
+++ b/build.config.goldfish.mips64

@@ -9,3 +9,4 @@
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86
index f86253f..2266c621 100644
--- a/build.config.goldfish.x86
+++ b/build.config.goldfish.x86

@@ -10,3 +10,4 @@
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64
index e173886..08c42c2 100644
--- a/build.config.goldfish.x86_64
+++ b/build.config.goldfish.x86_64

@@ -10,3 +10,4 @@
 vmlinux
 System.map
 "
+STOP_SHIP_TRACEPRINTK=1

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 2476650..8cde8ea8 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c

@@ -241,5 +241,46 @@ int verify_pkcs7_signature(const void *data, size_t len,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(verify_pkcs7_signature);
-
 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
+
+/**
+ * verify_signature_one - Verify a signature with keys from given keyring
+ * @sig: The signature to be verified
+ * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
+ *					(void *)1UL for all trusted keys).
+ * @keyid: key description (not partial)
+ */
+int verify_signature_one(const struct public_key_signature *sig,
+			   struct key *trusted_keys, const char *keyid)
+{
+	key_ref_t ref;
+	struct key *key;
+	int ret;
+
+	if (!sig)
+		return -EBADMSG;
+	if (!trusted_keys) {
+		trusted_keys = builtin_trusted_keys;
+	} else if (trusted_keys == (void *)1UL) {
+#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
+		trusted_keys = secondary_trusted_keys;
+#else
+		trusted_keys = builtin_trusted_keys;
+#endif
+	}
+
+	ref = keyring_search(make_key_ref(trusted_keys, 1),
+				&key_type_asymmetric, keyid);
+	if (IS_ERR(ref)) {
+		pr_err("Asymmetric key (%s) not found in keyring(%s)\n",
+				keyid, trusted_keys->description);
+		return -ENOKEY;
+	}
+
+	key = key_ref_to_ptr(ref);
+	ret = verify_signature(key, sig);
+	key_put(key);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(verify_signature_one);
+

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 1ffcd85..d51b731 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig

@@ -382,6 +382,34 @@
 	  Support for key wrapping (NIST SP800-38F / RFC3394) without
 	  padding.
 
+config CRYPTO_NHPOLY1305
+	tristate
+	select CRYPTO_HASH
+	select CRYPTO_POLY1305
+
+config CRYPTO_ADIANTUM
+	tristate "Adiantum support"
+	select CRYPTO_CHACHA20
+	select CRYPTO_POLY1305
+	select CRYPTO_NHPOLY1305
+	help
+	  Adiantum is a tweakable, length-preserving encryption mode
+	  designed for fast and secure disk encryption, especially on
+	  CPUs without dedicated crypto instructions.  It encrypts
+	  each sector using the XChaCha12 stream cipher, two passes of
+	  an ε-almost-∆-universal hash function, and an invocation of
+	  the AES-256 block cipher on a single 16-byte block.  On CPUs
+	  without AES instructions, Adiantum is much faster than
+	  AES-XTS.
+
+	  Adiantum's security is provably reducible to that of its
+	  underlying stream and block ciphers, subject to a security
+	  bound.  Unlike XTS, Adiantum is a true wide-block encryption
+	  mode, so it actually provides an even stronger notion of
+	  security than XTS, subject to the security bound.
+
+	  If unsure, say N.
+
 comment "Hash modes"
 
 config CRYPTO_CMAC
@@ -1311,18 +1339,26 @@
 	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
 
 config CRYPTO_CHACHA20
-	tristate "ChaCha20 cipher algorithm"
+	tristate "ChaCha stream cipher algorithms"
 	select CRYPTO_BLKCIPHER
 	help
-	  ChaCha20 cipher algorithm, RFC7539.
+	  The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
 
 	  ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
 	  Bernstein and further specified in RFC7539 for use in IETF protocols.
-	  This is the portable C implementation of ChaCha20.
-
-	  See also:
+	  This is the portable C implementation of ChaCha20.  See also:
 	  <http://cr.yp.to/chacha/chacha-20080128.pdf>
 
+	  XChaCha20 is the application of the XSalsa20 construction to ChaCha20
+	  rather than to Salsa20.  XChaCha20 extends ChaCha20's nonce length
+	  from 64 bits (or 96 bits using the RFC7539 convention) to 192 bits,
+	  while provably retaining ChaCha20's security.  See also:
+	  <https://cr.yp.to/snuffle/xsalsa-20081128.pdf>
+
+	  XChaCha12 is XChaCha20 reduced to 12 rounds, with correspondingly
+	  reduced security margin but increased performance.  It can be needed
+	  in some performance-sensitive scenarios.
+
 config CRYPTO_CHACHA20_X86_64
 	tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
 	depends on X86 && 64BIT
@@ -1454,20 +1490,6 @@
 	  See also:
 	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
 
-config CRYPTO_SPECK
-	tristate "Speck cipher algorithm"
-	select CRYPTO_ALGAPI
-	help
-	  Speck is a lightweight block cipher that is tuned for optimal
-	  performance in software (rather than hardware).
-
-	  Speck may not be as secure as AES, and should only be used on systems
-	  where AES is not fast enough.
-
-	  See also: <https://eprint.iacr.org/2013/404.pdf>
-
-	  If unsure, say N.
-
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1634,6 +1656,15 @@
 	help
 	  This is the LZ4 high compression mode algorithm.
 
+config CRYPTO_ZSTD
+	tristate "Zstd compression algorithm"
+	select CRYPTO_ALGAPI
+	select CRYPTO_ACOMP2
+	select ZSTD_COMPRESS
+	select ZSTD_DECOMPRESS
+	help
+	  This is the zstd algorithm.
+
 comment "Random Number Generation"
 
 config CRYPTO_ANSI_CPRNG

diff --git a/crypto/Makefile b/crypto/Makefile
index 233ef36..9cb27f1 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile

@@ -85,6 +85,8 @@
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
 obj-$(CONFIG_CRYPTO_CTR) += ctr.o
 obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
+obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
 obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
@@ -109,9 +111,8 @@
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
-obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
-obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
@@ -136,6 +137,7 @@
 obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
+obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
 
 #
 # generic algorithms and the async_tx api

diff --git a/crypto/adiantum.c b/crypto/adiantum.c
new file mode 100644
index 0000000..2dfcf12
--- /dev/null
+++ b/crypto/adiantum.c

@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Adiantum length-preserving encryption mode
+ *
+ * Copyright 2018 Google LLC
+ */
+
+/*
+ * Adiantum is a tweakable, length-preserving encryption mode designed for fast
+ * and secure disk encryption, especially on CPUs without dedicated crypto
+ * instructions.  Adiantum encrypts each sector using the XChaCha12 stream
+ * cipher, two passes of an ε-almost-∆-universal (εA∆U) hash function based on
+ * NH and Poly1305, and an invocation of the AES-256 block cipher on a single
+ * 16-byte block.  See the paper for details:
+ *
+ *	Adiantum: length-preserving encryption for entry-level processors
+ *      (https://eprint.iacr.org/2018/720.pdf)
+ *
+ * For flexibility, this implementation also allows other ciphers:
+ *
+ *	- Stream cipher: XChaCha12 or XChaCha20
+ *	- Block cipher: any with a 128-bit block size and 256-bit key
+ *
+ * This implementation doesn't currently allow other εA∆U hash functions, i.e.
+ * HPolyC is not supported.  This is because Adiantum is ~20% faster than HPolyC
+ * but still provably as secure, and also the εA∆U hash function of HBSH is
+ * formally defined to take two inputs (tweak, message) which makes it difficult
+ * to wrap with the crypto_shash API.  Rather, some details need to be handled
+ * here.  Nevertheless, if needed in the future, support for other εA∆U hash
+ * functions could be added here.
+ */
+
+#include <crypto/b128ops.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/nhpoly1305.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+/*
+ * Size of right-hand block of input data, in bytes; also the size of the block
+ * cipher's block size and the hash function's output.
+ */
+#define BLOCKCIPHER_BLOCK_SIZE		16
+
+/* Size of the block cipher key (K_E) in bytes */
+#define BLOCKCIPHER_KEY_SIZE		32
+
+/* Size of the hash key (K_H) in bytes */
+#define HASH_KEY_SIZE		(POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE)
+
+/*
+ * The specification allows variable-length tweaks, but Linux's crypto API
+ * currently only allows algorithms to support a single length.  The "natural"
+ * tweak length for Adiantum is 16, since that fits into one Poly1305 block for
+ * the best performance.  But longer tweaks are useful for fscrypt, to avoid
+ * needing to derive per-file keys.  So instead we use two blocks, or 32 bytes.
+ */
+#define TWEAK_SIZE		32
+
+struct adiantum_instance_ctx {
+	struct crypto_skcipher_spawn streamcipher_spawn;
+	struct crypto_spawn blockcipher_spawn;
+	struct crypto_shash_spawn hash_spawn;
+};
+
+struct adiantum_tfm_ctx {
+	struct crypto_skcipher *streamcipher;
+	struct crypto_cipher *blockcipher;
+	struct crypto_shash *hash;
+	struct poly1305_key header_hash_key;
+};
+
+struct adiantum_request_ctx {
+
+	/*
+	 * Buffer for right-hand block of data, i.e.
+	 *
+	 *    P_L => P_M => C_M => C_R when encrypting, or
+	 *    C_R => C_M => P_M => P_L when decrypting.
+	 *
+	 * Also used to build the IV for the stream cipher.
+	 */
+	union {
+		u8 bytes[XCHACHA_IV_SIZE];
+		__le32 words[XCHACHA_IV_SIZE / sizeof(__le32)];
+		le128 bignum;	/* interpret as element of Z/(2^{128}Z) */
+	} rbuf;
+
+	bool enc; /* true if encrypting, false if decrypting */
+
+	/*
+	 * The result of the Poly1305 εA∆U hash function applied to
+	 * (message length, tweak).
+	 */
+	le128 header_hash;
+
+	/* Sub-requests, must be last */
+	union {
+		struct shash_desc hash_desc;
+		struct skcipher_request streamcipher_req;
+	} u;
+};
+
+/*
+ * Given the XChaCha stream key K_S, derive the block cipher key K_E and the
+ * hash key K_H as follows:
+ *
+ *     K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191)
+ *
+ * Note that this denotes using bits from the XChaCha keystream, which here we
+ * get indirectly by encrypting a buffer containing all 0's.
+ */
+static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct {
+		u8 iv[XCHACHA_IV_SIZE];
+		u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE];
+		struct scatterlist sg;
+		struct crypto_wait wait;
+		struct skcipher_request req; /* must be last */
+	} *data;
+	u8 *keyp;
+	int err;
+
+	/* Set the stream cipher key (K_S) */
+	crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(tctx->streamcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen);
+	crypto_skcipher_set_flags(tfm,
+				crypto_skcipher_get_flags(tctx->streamcipher) &
+				CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
+	/* Derive the subkeys */
+	data = kzalloc(sizeof(*data) +
+		       crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->iv[0] = 1;
+	sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys));
+	crypto_init_wait(&data->wait);
+	skcipher_request_set_tfm(&data->req, tctx->streamcipher);
+	skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+						  CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      crypto_req_done, &data->wait);
+	skcipher_request_set_crypt(&data->req, &data->sg, &data->sg,
+				   sizeof(data->derived_keys), data->iv);
+	err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait);
+	if (err)
+		goto out;
+	keyp = data->derived_keys;
+
+	/* Set the block cipher key (K_E) */
+	crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(tctx->blockcipher,
+				crypto_skcipher_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(tctx->blockcipher, keyp,
+				   BLOCKCIPHER_KEY_SIZE);
+	crypto_skcipher_set_flags(tfm,
+				  crypto_cipher_get_flags(tctx->blockcipher) &
+				  CRYPTO_TFM_RES_MASK);
+	if (err)
+		goto out;
+	keyp += BLOCKCIPHER_KEY_SIZE;
+
+	/* Set the hash key (K_H) */
+	poly1305_core_setkey(&tctx->header_hash_key, keyp);
+	keyp += POLY1305_BLOCK_SIZE;
+
+	crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK);
+	crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) &
+					   CRYPTO_TFM_REQ_MASK);
+	err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE);
+	crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) &
+				       CRYPTO_TFM_RES_MASK);
+	keyp += NHPOLY1305_KEY_SIZE;
+	WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]);
+out:
+	kzfree(data);
+	return err;
+}
+
+/* Addition in Z/(2^{128}Z) */
+static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2)
+{
+	u64 x = le64_to_cpu(v1->b);
+	u64 y = le64_to_cpu(v2->b);
+
+	r->b = cpu_to_le64(x + y);
+	r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) +
+			   (x + y < x));
+}
+
+/* Subtraction in Z/(2^{128}Z) */
+static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2)
+{
+	u64 x = le64_to_cpu(v1->b);
+	u64 y = le64_to_cpu(v2->b);
+
+	r->b = cpu_to_le64(x - y);
+	r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) -
+			   (x - y > x));
+}
+
+/*
+ * Apply the Poly1305 εA∆U hash function to (message length, tweak) and save the
+ * result to rctx->header_hash.
+ *
+ * This value is reused in both the first and second hash steps.  Specifically,
+ * it's added to the result of an independently keyed εA∆U hash function (for
+ * equal length inputs only) taken over the message.  This gives the overall
+ * Adiantum hash of the (tweak, message) pair.
+ */
+static void adiantum_hash_header(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	struct {
+		__le64 message_bits;
+		__le64 padding;
+	} header = {
+		.message_bits = cpu_to_le64((u64)bulk_len * 8)
+	};
+	struct poly1305_state state;
+
+	poly1305_core_init(&state);
+
+	BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
+	poly1305_core_blocks(&state, &tctx->header_hash_key,
+			     &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+
+	BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
+	poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
+			     TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+
+	poly1305_core_emit(&state, &rctx->header_hash);
+}
+
+/* Hash the left-hand block (the "bulk") of the message using NHPoly1305 */
+static int adiantum_hash_message(struct skcipher_request *req,
+				 struct scatterlist *sgl, le128 *digest)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	struct shash_desc *hash_desc = &rctx->u.hash_desc;
+	struct sg_mapping_iter miter;
+	unsigned int i, n;
+	int err;
+
+	hash_desc->tfm = tctx->hash;
+	hash_desc->flags = 0;
+
+	err = crypto_shash_init(hash_desc);
+	if (err)
+		return err;
+
+	sg_miter_start(&miter, sgl, sg_nents(sgl),
+		       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+	for (i = 0; i < bulk_len; i += n) {
+		sg_miter_next(&miter);
+		n = min_t(unsigned int, miter.length, bulk_len - i);
+		err = crypto_shash_update(hash_desc, miter.addr, n);
+		if (err)
+			break;
+	}
+	sg_miter_stop(&miter);
+	if (err)
+		return err;
+
+	return crypto_shash_final(hash_desc, (u8 *)digest);
+}
+
+/* Continue Adiantum encryption/decryption after the stream cipher step */
+static int adiantum_finish(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	le128 digest;
+	int err;
+
+	/* If decrypting, decrypt C_M with the block cipher to get P_M */
+	if (!rctx->enc)
+		crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes,
+					  rctx->rbuf.bytes);
+
+	/*
+	 * Second hash step
+	 *	enc: C_R = C_M - H_{K_H}(T, C_L)
+	 *	dec: P_R = P_M - H_{K_H}(T, P_L)
+	 */
+	err = adiantum_hash_message(req, req->dst, &digest);
+	if (err)
+		return err;
+	le128_add(&digest, &digest, &rctx->header_hash);
+	le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest);
+	scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->dst,
+				 bulk_len, BLOCKCIPHER_BLOCK_SIZE, 1);
+	return 0;
+}
+
+static void adiantum_streamcipher_done(struct crypto_async_request *areq,
+				       int err)
+{
+	struct skcipher_request *req = areq->data;
+
+	if (!err)
+		err = adiantum_finish(req);
+
+	skcipher_request_complete(req, err);
+}
+
+static int adiantum_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	unsigned int stream_len;
+	le128 digest;
+	int err;
+
+	if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
+		return -EINVAL;
+
+	rctx->enc = enc;
+
+	/*
+	 * First hash step
+	 *	enc: P_M = P_R + H_{K_H}(T, P_L)
+	 *	dec: C_M = C_R + H_{K_H}(T, C_L)
+	 */
+	adiantum_hash_header(req);
+	err = adiantum_hash_message(req, req->src, &digest);
+	if (err)
+		return err;
+	le128_add(&digest, &digest, &rctx->header_hash);
+	scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->src,
+				 bulk_len, BLOCKCIPHER_BLOCK_SIZE, 0);
+	le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest);
+
+	/* If encrypting, encrypt P_M with the block cipher to get C_M */
+	if (enc)
+		crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes,
+					  rctx->rbuf.bytes);
+
+	/* Initialize the rest of the XChaCha IV (first part is C_M) */
+	BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16);
+	BUILD_BUG_ON(XCHACHA_IV_SIZE != 32);	/* nonce || stream position */
+	rctx->rbuf.words[4] = cpu_to_le32(1);
+	rctx->rbuf.words[5] = 0;
+	rctx->rbuf.words[6] = 0;
+	rctx->rbuf.words[7] = 0;
+
+	/*
+	 * XChaCha needs to be done on all the data except the last 16 bytes;
+	 * for disk encryption that usually means 4080 or 496 bytes.  But ChaCha
+	 * implementations tend to be most efficient when passed a whole number
+	 * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes.
+	 * And here it doesn't matter whether the last 16 bytes are written to,
+	 * as the second hash step will overwrite them.  Thus, round the XChaCha
+	 * length up to the next 64-byte boundary if possible.
+	 */
+	stream_len = bulk_len;
+	if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen)
+		stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE);
+
+	skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher);
+	skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src,
+				   req->dst, stream_len, &rctx->rbuf);
+	skcipher_request_set_callback(&rctx->u.streamcipher_req,
+				      req->base.flags,
+				      adiantum_streamcipher_done, req);
+	return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?:
+		adiantum_finish(req);
+}
+
+static int adiantum_encrypt(struct skcipher_request *req)
+{
+	return adiantum_crypt(req, true);
+}
+
+static int adiantum_decrypt(struct skcipher_request *req)
+{
+	return adiantum_crypt(req, false);
+}
+
+static int adiantum_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
+	struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *streamcipher;
+	struct crypto_cipher *blockcipher;
+	struct crypto_shash *hash;
+	unsigned int subreq_size;
+	int err;
+
+	streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn);
+	if (IS_ERR(streamcipher))
+		return PTR_ERR(streamcipher);
+
+	blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
+	if (IS_ERR(blockcipher)) {
+		err = PTR_ERR(blockcipher);
+		goto err_free_streamcipher;
+	}
+
+	hash = crypto_spawn_shash(&ictx->hash_spawn);
+	if (IS_ERR(hash)) {
+		err = PTR_ERR(hash);
+		goto err_free_blockcipher;
+	}
+
+	tctx->streamcipher = streamcipher;
+	tctx->blockcipher = blockcipher;
+	tctx->hash = hash;
+
+	BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) !=
+		     sizeof(struct adiantum_request_ctx));
+	subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx,
+				       u.hash_desc) +
+			  crypto_shash_descsize(hash),
+			  FIELD_SIZEOF(struct adiantum_request_ctx,
+				       u.streamcipher_req) +
+			  crypto_skcipher_reqsize(streamcipher));
+
+	crypto_skcipher_set_reqsize(tfm,
+				    offsetof(struct adiantum_request_ctx, u) +
+				    subreq_size);
+	return 0;
+
+err_free_blockcipher:
+	crypto_free_cipher(blockcipher);
+err_free_streamcipher:
+	crypto_free_skcipher(streamcipher);
+	return err;
+}
+
+static void adiantum_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(tctx->streamcipher);
+	crypto_free_cipher(tctx->blockcipher);
+	crypto_free_shash(tctx->hash);
+}
+
+static void adiantum_free_instance(struct skcipher_instance *inst)
+{
+	struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
+
+	crypto_drop_skcipher(&ictx->streamcipher_spawn);
+	crypto_drop_spawn(&ictx->blockcipher_spawn);
+	crypto_drop_shash(&ictx->hash_spawn);
+	kfree(inst);
+}
+
+/*
+ * Check for a supported set of inner algorithms.
+ * See the comment at the beginning of this file.
+ */
+static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg,
+					  struct crypto_alg *blockcipher_alg,
+					  struct shash_alg *hash_alg)
+{
+	if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 &&
+	    strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0)
+		return false;
+
+	if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE ||
+	    blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE)
+		return false;
+	if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
+		return false;
+
+	if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0)
+		return false;
+
+	return true;
+}
+
+static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct crypto_attr_type *algt;
+	const char *streamcipher_name;
+	const char *blockcipher_name;
+	const char *nhpoly1305_name;
+	struct skcipher_instance *inst;
+	struct adiantum_instance_ctx *ictx;
+	struct skcipher_alg *streamcipher_alg;
+	struct crypto_alg *blockcipher_alg;
+	struct crypto_alg *_hash_alg;
+	struct shash_alg *hash_alg;
+	int err;
+
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return -EINVAL;
+
+	streamcipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(streamcipher_name))
+		return PTR_ERR(streamcipher_name);
+
+	blockcipher_name = crypto_attr_alg_name(tb[2]);
+	if (IS_ERR(blockcipher_name))
+		return PTR_ERR(blockcipher_name);
+
+	nhpoly1305_name = crypto_attr_alg_name(tb[3]);
+	if (nhpoly1305_name == ERR_PTR(-ENOENT))
+		nhpoly1305_name = "nhpoly1305";
+	if (IS_ERR(nhpoly1305_name))
+		return PTR_ERR(nhpoly1305_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	ictx = skcipher_instance_ctx(inst);
+
+	/* Stream cipher, e.g. "xchacha12" */
+	err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name,
+				   0, crypto_requires_sync(algt->type,
+							   algt->mask));
+	if (err)
+		goto out_free_inst;
+	streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn);
+
+	/* Block cipher, e.g. "aes" */
+	err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name,
+				CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK);
+	if (err)
+		goto out_drop_streamcipher;
+	blockcipher_alg = ictx->blockcipher_spawn.alg;
+
+	/* NHPoly1305 εA∆U hash function */
+	_hash_alg = crypto_alg_mod_lookup(nhpoly1305_name,
+					  CRYPTO_ALG_TYPE_SHASH,
+					  CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(_hash_alg)) {
+		err = PTR_ERR(_hash_alg);
+		goto out_drop_blockcipher;
+	}
+	hash_alg = __crypto_shash_alg(_hash_alg);
+	err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg,
+				      skcipher_crypto_instance(inst));
+	if (err) {
+		crypto_mod_put(_hash_alg);
+		goto out_drop_blockcipher;
+	}
+
+	/* Check the set of algorithms */
+	if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg,
+					   hash_alg)) {
+		pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n",
+			streamcipher_alg->base.cra_name,
+			blockcipher_alg->cra_name, hash_alg->base.cra_name);
+		err = -EINVAL;
+		goto out_drop_hash;
+	}
+
+	/* Instance fields */
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "adiantum(%s,%s)", streamcipher_alg->base.cra_name,
+		     blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_drop_hash;
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "adiantum(%s,%s,%s)",
+		     streamcipher_alg->base.cra_driver_name,
+		     blockcipher_alg->cra_driver_name,
+		     hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_drop_hash;
+
+	inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
+	inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx);
+	inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask |
+				       hash_alg->base.cra_alignmask;
+	/*
+	 * The block cipher is only invoked once per message, so for long
+	 * messages (e.g. sectors for disk encryption) its performance doesn't
+	 * matter as much as that of the stream cipher and hash function.  Thus,
+	 * weigh the block cipher's ->cra_priority less.
+	 */
+	inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority +
+				       2 * hash_alg->base.cra_priority +
+				       blockcipher_alg->cra_priority) / 7;
+
+	inst->alg.setkey = adiantum_setkey;
+	inst->alg.encrypt = adiantum_encrypt;
+	inst->alg.decrypt = adiantum_decrypt;
+	inst->alg.init = adiantum_init_tfm;
+	inst->alg.exit = adiantum_exit_tfm;
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(streamcipher_alg);
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(streamcipher_alg);
+	inst->alg.ivsize = TWEAK_SIZE;
+
+	inst->free = adiantum_free_instance;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto out_drop_hash;
+
+	return 0;
+
+out_drop_hash:
+	crypto_drop_shash(&ictx->hash_spawn);
+out_drop_blockcipher:
+	crypto_drop_spawn(&ictx->blockcipher_spawn);
+out_drop_streamcipher:
+	crypto_drop_skcipher(&ictx->streamcipher_spawn);
+out_free_inst:
+	kfree(inst);
+	return err;
+}
+
+/* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */
+static struct crypto_template adiantum_tmpl = {
+	.name = "adiantum",
+	.create = adiantum_create,
+	.module = THIS_MODULE,
+};
+
+static int __init adiantum_module_init(void)
+{
+	return crypto_register_template(&adiantum_tmpl);
+}
+
+static void __exit adiantum_module_exit(void)
+{
+	crypto_unregister_template(&adiantum_tmpl);
+}
+
+module_init(adiantum_module_init);
+module_exit(adiantum_module_exit);
+
+MODULE_DESCRIPTION("Adiantum length-preserving encryption mode");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("adiantum");

diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 3dd1011..956f8b8 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c

@@ -62,7 +62,8 @@ static inline u8 byte(const u32 x, const unsigned n)
 
 static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
 
-__visible const u32 crypto_ft_tab[4][256] = {
+/* cacheline-aligned to facilitate prefetching into cache */
+__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
 	{
 		0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
 		0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
@@ -326,7 +327,7 @@ __visible const u32 crypto_ft_tab[4][256] = {
 	}
 };
 
-__visible const u32 crypto_fl_tab[4][256] = {
+__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
 	{
 		0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
 		0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -590,7 +591,7 @@ __visible const u32 crypto_fl_tab[4][256] = {
 	}
 };
 
-__visible const u32 crypto_it_tab[4][256] = {
+__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
 	{
 		0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
 		0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
@@ -854,7 +855,7 @@ __visible const u32 crypto_it_tab[4][256] = {
 	}
 };
 
-__visible const u32 crypto_il_tab[4][256] = {
+__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
 	{
 		0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
 		0x00000030, 0x00000036, 0x000000a5, 0x00000038,

diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
deleted file mode 100644
index 1cab831..0000000
--- a/crypto/chacha20_generic.c
+++ /dev/null

@@ -1,151 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <crypto/chacha20.h>
-
-static inline u32 le32_to_cpuvp(const void *p)
-{
-	return le32_to_cpup(p);
-}
-
-static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
-			     unsigned int bytes)
-{
-	u8 stream[CHACHA20_BLOCK_SIZE];
-
-	if (dst != src)
-		memcpy(dst, src, bytes);
-
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block(state, stream);
-		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-	}
-	if (bytes) {
-		chacha20_block(state, stream);
-		crypto_xor(dst, stream, bytes);
-	}
-}
-
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
-{
-	static const char constant[16] = "expand 32-byte k";
-
-	state[0]  = le32_to_cpuvp(constant +  0);
-	state[1]  = le32_to_cpuvp(constant +  4);
-	state[2]  = le32_to_cpuvp(constant +  8);
-	state[3]  = le32_to_cpuvp(constant + 12);
-	state[4]  = ctx->key[0];
-	state[5]  = ctx->key[1];
-	state[6]  = ctx->key[2];
-	state[7]  = ctx->key[3];
-	state[8]  = ctx->key[4];
-	state[9]  = ctx->key[5];
-	state[10] = ctx->key[6];
-	state[11] = ctx->key[7];
-	state[12] = le32_to_cpuvp(iv +  0);
-	state[13] = le32_to_cpuvp(iv +  4);
-	state[14] = le32_to_cpuvp(iv +  8);
-	state[15] = le32_to_cpuvp(iv + 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_init);
-
-int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keysize)
-{
-	struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
-	int i;
-
-	if (keysize != CHACHA20_KEY_SIZE)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-
-int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
-
-	crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
-
-	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % CHACHA20_BLOCK_SIZE);
-	}
-
-	if (walk.nbytes) {
-		chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
-
-static struct crypto_alg alg = {
-	.cra_name		= "chacha20",
-	.cra_driver_name	= "chacha20-generic",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_ctxsize		= sizeof(struct chacha20_ctx),
-	.cra_alignmask		= sizeof(u32) - 1,
-	.cra_module		= THIS_MODULE,
-	.cra_u			= {
-		.blkcipher = {
-			.min_keysize	= CHACHA20_KEY_SIZE,
-			.max_keysize	= CHACHA20_KEY_SIZE,
-			.ivsize		= CHACHA20_IV_SIZE,
-			.geniv		= "seqiv",
-			.setkey		= crypto_chacha20_setkey,
-			.encrypt	= crypto_chacha20_crypt,
-			.decrypt	= crypto_chacha20_crypt,
-		},
-	},
-};
-
-static int __init chacha20_generic_mod_init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit chacha20_generic_mod_fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(chacha20_generic_mod_init);
-module_exit(chacha20_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-generic");

diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index cb1c3a3..c0f699c 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c

@@ -13,7 +13,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 #include <crypto/poly1305.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -51,7 +51,7 @@ struct poly_req {
 };
 
 struct chacha_req {
-	u8 iv[CHACHA20_IV_SIZE];
+	u8 iv[CHACHA_IV_SIZE];
 	struct scatterlist src[1];
 	struct skcipher_request req; /* must be last member */
 };
@@ -91,7 +91,7 @@ static void chacha_iv(u8 *iv, struct aead_request *req, u32 icb)
 	memcpy(iv, &leicb, sizeof(leicb));
 	memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen);
 	memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv,
-	       CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen);
+	       CHACHA_IV_SIZE - sizeof(leicb) - ctx->saltlen);
 }
 
 static int poly_verify_tag(struct aead_request *req)
@@ -494,7 +494,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 	struct chachapoly_ctx *ctx = crypto_aead_ctx(aead);
 	int err;
 
-	if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE)
+	if (keylen != ctx->saltlen + CHACHA_KEY_SIZE)
 		return -EINVAL;
 
 	keylen -= ctx->saltlen;
@@ -639,7 +639,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 
 	err = -EINVAL;
 	/* Need 16-byte IV size, including Initial Block Counter value */
-	if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE)
+	if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE)
 		goto out_drop_chacha;
 	/* Not a stream cipher? */
 	if (chacha->base.cra_blocksize != 1)

diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
new file mode 100644
index 0000000..dda06a8f
--- /dev/null
+++ b/crypto/chacha_generic.c

@@ -0,0 +1,243 @@
+/*
+ * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2018 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <crypto/chacha.h>
+
+static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
+			   unsigned int bytes, int nrounds)
+{
+	/* aligned to potentially speed up crypto_xor() */
+	u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
+
+	if (dst != src)
+		memcpy(dst, src, bytes);
+
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block(state, stream, nrounds);
+		crypto_xor(dst, stream, CHACHA_BLOCK_SIZE);
+		bytes -= CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+	}
+	if (bytes) {
+		chacha_block(state, stream, nrounds);
+		crypto_xor(dst, stream, bytes);
+	}
+}
+
+static int chacha_stream_xor(struct blkcipher_desc *desc, struct scatterlist *dst,
+			     struct scatterlist *src, unsigned int nbytes,
+			     struct chacha_ctx *ctx, u8 *iv)
+{
+	struct blkcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CHACHA_BLOCK_SIZE);
+
+	crypto_chacha_init(state, ctx, iv);
+
+	while (walk.nbytes >= CHACHA_BLOCK_SIZE) {
+		chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+			       rounddown(walk.nbytes, CHACHA_BLOCK_SIZE),
+			       ctx->nrounds);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % CHACHA_BLOCK_SIZE);
+	}
+
+	if (walk.nbytes) {
+		chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+			       walk.nbytes, ctx->nrounds);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv)
+{
+	state[0]  = 0x61707865; /* "expa" */
+	state[1]  = 0x3320646e; /* "nd 3" */
+	state[2]  = 0x79622d32; /* "2-by" */
+	state[3]  = 0x6b206574; /* "te k" */
+	state[4]  = ctx->key[0];
+	state[5]  = ctx->key[1];
+	state[6]  = ctx->key[2];
+	state[7]  = ctx->key[3];
+	state[8]  = ctx->key[4];
+	state[9]  = ctx->key[5];
+	state[10] = ctx->key[6];
+	state[11] = ctx->key[7];
+	state[12] = get_unaligned_le32(iv +  0);
+	state[13] = get_unaligned_le32(iv +  4);
+	state[14] = get_unaligned_le32(iv +  8);
+	state[15] = get_unaligned_le32(iv + 12);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha_init);
+
+static int chacha_setkey(struct crypto_tfm *tfm, const u8 *key,
+			 unsigned int keysize, int nrounds)
+{
+	struct chacha_ctx *ctx = crypto_tfm_ctx(tfm);
+	int i;
+
+	if (keysize != CHACHA_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+	ctx->nrounds = nrounds;
+	return 0;
+}
+
+int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keysize)
+{
+	return chacha_setkey(tfm, key, keysize, 20);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
+
+int crypto_chacha12_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keysize)
+{
+	return chacha_setkey(tfm, key, keysize, 12);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
+
+int crypto_chacha_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			struct scatterlist *src, unsigned int nbytes)
+{
+	struct chacha_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *iv = desc->info;
+
+	return chacha_stream_xor(desc, dst, src, nbytes, ctx, iv);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
+
+int crypto_xchacha_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			 struct scatterlist *src, unsigned int nbytes)
+{
+	struct chacha_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *iv = desc->info;
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	/* Compute the subkey given the original key and first 128 nonce bits */
+	crypto_chacha_init(state, ctx, iv);
+	hchacha_block(state, subctx.key, ctx->nrounds);
+	subctx.nrounds = ctx->nrounds;
+
+	/* Build the real IV */
+	memcpy(&real_iv[0], iv + 24, 8); /* stream position */
+	memcpy(&real_iv[8], iv + 16, 8); /* remaining 64 nonce bits */
+
+	/* Generate the stream and XOR it with the data */
+	return chacha_stream_xor(desc, dst, src, nbytes, &subctx, real_iv);
+}
+EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
+
+static struct crypto_alg algs[] = {
+	{
+		.cra_name		= "chacha20",
+		.cra_driver_name	= "chacha20-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+		.cra_blocksize		= 1,
+		.cra_type		= &crypto_blkcipher_type,
+		.cra_ctxsize		= sizeof(struct chacha_ctx),
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.blkcipher = {
+				.min_keysize	= CHACHA_KEY_SIZE,
+				.max_keysize	= CHACHA_KEY_SIZE,
+				.ivsize		= CHACHA_IV_SIZE,
+				.geniv		= "seqiv",
+				.setkey		= crypto_chacha20_setkey,
+				.encrypt	= crypto_chacha_crypt,
+				.decrypt	= crypto_chacha_crypt,
+			},
+		},
+	}, {
+		.cra_name		= "xchacha20",
+		.cra_driver_name	= "xchacha20-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+		.cra_blocksize		= 1,
+		.cra_type		= &crypto_blkcipher_type,
+		.cra_ctxsize		= sizeof(struct chacha_ctx),
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.blkcipher = {
+				.min_keysize	= CHACHA_KEY_SIZE,
+				.max_keysize	= CHACHA_KEY_SIZE,
+				.ivsize		= XCHACHA_IV_SIZE,
+				.geniv		= "seqiv",
+				.setkey		= crypto_chacha20_setkey,
+				.encrypt	= crypto_xchacha_crypt,
+				.decrypt	= crypto_xchacha_crypt,
+			},
+		},
+	}, {
+		.cra_name		= "xchacha12",
+		.cra_driver_name	= "xchacha12-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+		.cra_blocksize		= 1,
+		.cra_type		= &crypto_blkcipher_type,
+		.cra_ctxsize		= sizeof(struct chacha_ctx),
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.blkcipher = {
+				.min_keysize	= CHACHA_KEY_SIZE,
+				.max_keysize	= CHACHA_KEY_SIZE,
+				.ivsize		= XCHACHA_IV_SIZE,
+				.geniv		= "seqiv",
+				.setkey		= crypto_chacha12_setkey,
+				.encrypt	= crypto_xchacha_crypt,
+				.decrypt	= crypto_xchacha_crypt,
+			},
+		},
+	},
+};
+
+static int __init chacha_generic_mod_init(void)
+{
+	return crypto_register_algs(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_generic_mod_fini(void)
+{
+	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_generic_mod_init);
+module_exit(chacha_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-generic");

diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
new file mode 100644
index 0000000..c838585
--- /dev/null
+++ b/crypto/nhpoly1305.c

@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ *
+ * Copyright 2018 Google LLC
+ */
+
+/*
+ * "NHPoly1305" is the main component of Adiantum hashing.
+ * Specifically, it is the calculation
+ *
+ *	H_M ← Poly1305_{K_M}(NH_{K_N}(pad_{128}(M)))
+ *
+ * from the procedure in section A.5 of the Adiantum paper [1].  It is an
+ * ε-almost-∆-universal (εA∆U) hash function for equal-length inputs over
+ * Z/(2^{128}Z), where the "∆" operation is addition.  It hashes 1024-byte
+ * chunks of the input with the NH hash function [2], reducing the input length
+ * by 32x.  The resulting NH digests are evaluated as a polynomial in
+ * GF(2^{130}-5), like in the Poly1305 MAC [3].  Note that the polynomial
+ * evaluation by itself would suffice to achieve the εA∆U property; NH is used
+ * for performance since it's over twice as fast as Poly1305.
+ *
+ * This is *not* a cryptographic hash function; do not use it as such!
+ *
+ * [1] Adiantum: length-preserving encryption for entry-level processors
+ *     (https://eprint.iacr.org/2018/720.pdf)
+ * [2] UMAC: Fast and Secure Message Authentication
+ *     (https://fastcrypto.org/umac/umac_proc.pdf)
+ * [3] The Poly1305-AES message-authentication code
+ *     (https://cr.yp.to/mac/poly1305-20050329.pdf)
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static void nh_generic(const u32 *key, const u8 *message, size_t message_len,
+		       __le64 hash[NH_NUM_PASSES])
+{
+	u64 sums[4] = { 0, 0, 0, 0 };
+
+	BUILD_BUG_ON(NH_PAIR_STRIDE != 2);
+	BUILD_BUG_ON(NH_NUM_PASSES != 4);
+
+	while (message_len) {
+		u32 m0 = get_unaligned_le32(message + 0);
+		u32 m1 = get_unaligned_le32(message + 4);
+		u32 m2 = get_unaligned_le32(message + 8);
+		u32 m3 = get_unaligned_le32(message + 12);
+
+		sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]);
+		sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]);
+		sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]);
+		sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]);
+		sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]);
+		sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]);
+		sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]);
+		sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]);
+		key += NH_MESSAGE_UNIT / sizeof(key[0]);
+		message += NH_MESSAGE_UNIT;
+		message_len -= NH_MESSAGE_UNIT;
+	}
+
+	hash[0] = cpu_to_le64(sums[0]);
+	hash[1] = cpu_to_le64(sums[1]);
+	hash[2] = cpu_to_le64(sums[2]);
+	hash[3] = cpu_to_le64(sums[3]);
+}
+
+/* Pass the next NH hash value through Poly1305 */
+static void process_nh_hash_value(struct nhpoly1305_state *state,
+				  const struct nhpoly1305_key *key)
+{
+	BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
+
+	poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
+			     NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
+}
+
+/*
+ * Feed the next portion of the source data, as a whole number of 16-byte
+ * "NH message units", through NH and Poly1305.  Each NH hash is taken over
+ * 1024 bytes, except possibly the final one which is taken over a multiple of
+ * 16 bytes up to 1024.  Also, in the case where data is passed in misaligned
+ * chunks, we combine partial hashes; the end result is the same either way.
+ */
+static void nhpoly1305_units(struct nhpoly1305_state *state,
+			     const struct nhpoly1305_key *key,
+			     const u8 *src, unsigned int srclen, nh_t nh_fn)
+{
+	do {
+		unsigned int bytes;
+
+		if (state->nh_remaining == 0) {
+			/* Starting a new NH message */
+			bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES);
+			nh_fn(key->nh_key, src, bytes, state->nh_hash);
+			state->nh_remaining = NH_MESSAGE_BYTES - bytes;
+		} else {
+			/* Continuing a previous NH message */
+			__le64 tmp_hash[NH_NUM_PASSES];
+			unsigned int pos;
+			int i;
+
+			pos = NH_MESSAGE_BYTES - state->nh_remaining;
+			bytes = min(srclen, state->nh_remaining);
+			nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash);
+			for (i = 0; i < NH_NUM_PASSES; i++)
+				le64_add_cpu(&state->nh_hash[i],
+					     le64_to_cpu(tmp_hash[i]));
+			state->nh_remaining -= bytes;
+		}
+		if (state->nh_remaining == 0)
+			process_nh_hash_value(state, key);
+		src += bytes;
+		srclen -= bytes;
+	} while (srclen);
+}
+
+int crypto_nhpoly1305_setkey(struct crypto_shash *tfm,
+			     const u8 *key, unsigned int keylen)
+{
+	struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm);
+	int i;
+
+	if (keylen != NHPOLY1305_KEY_SIZE)
+		return -EINVAL;
+
+	poly1305_core_setkey(&ctx->poly_key, key);
+	key += POLY1305_BLOCK_SIZE;
+
+	for (i = 0; i < NH_KEY_WORDS; i++)
+		ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_setkey);
+
+int crypto_nhpoly1305_init(struct shash_desc *desc)
+{
+	struct nhpoly1305_state *state = shash_desc_ctx(desc);
+
+	poly1305_core_init(&state->poly_state);
+	state->buflen = 0;
+	state->nh_remaining = 0;
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_init);
+
+int crypto_nhpoly1305_update_helper(struct shash_desc *desc,
+				    const u8 *src, unsigned int srclen,
+				    nh_t nh_fn)
+{
+	struct nhpoly1305_state *state = shash_desc_ctx(desc);
+	const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm);
+	unsigned int bytes;
+
+	if (state->buflen) {
+		bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen);
+		memcpy(&state->buffer[state->buflen], src, bytes);
+		state->buflen += bytes;
+		if (state->buflen < NH_MESSAGE_UNIT)
+			return 0;
+		nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT,
+				 nh_fn);
+		state->buflen = 0;
+		src += bytes;
+		srclen -= bytes;
+	}
+
+	if (srclen >= NH_MESSAGE_UNIT) {
+		bytes = round_down(srclen, NH_MESSAGE_UNIT);
+		nhpoly1305_units(state, key, src, bytes, nh_fn);
+		src += bytes;
+		srclen -= bytes;
+	}
+
+	if (srclen) {
+		memcpy(state->buffer, src, srclen);
+		state->buflen = srclen;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_update_helper);
+
+int crypto_nhpoly1305_update(struct shash_desc *desc,
+			     const u8 *src, unsigned int srclen)
+{
+	return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic);
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_update);
+
+int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn)
+{
+	struct nhpoly1305_state *state = shash_desc_ctx(desc);
+	const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm);
+
+	if (state->buflen) {
+		memset(&state->buffer[state->buflen], 0,
+		       NH_MESSAGE_UNIT - state->buflen);
+		nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT,
+				 nh_fn);
+	}
+
+	if (state->nh_remaining)
+		process_nh_hash_value(state, key);
+
+	poly1305_core_emit(&state->poly_state, dst);
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
+
+int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	return crypto_nhpoly1305_final_helper(desc, dst, nh_generic);
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_final);
+
+static struct shash_alg nhpoly1305_alg = {
+	.base.cra_name		= "nhpoly1305",
+	.base.cra_driver_name	= "nhpoly1305-generic",
+	.base.cra_priority	= 100,
+	.base.cra_ctxsize	= sizeof(struct nhpoly1305_key),
+	.base.cra_module	= THIS_MODULE,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.init			= crypto_nhpoly1305_init,
+	.update			= crypto_nhpoly1305_update,
+	.final			= crypto_nhpoly1305_final,
+	.setkey			= crypto_nhpoly1305_setkey,
+	.descsize		= sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+	return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-generic");

diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index bca9923..b60c1ee 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c

@@ -17,6 +17,7 @@
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <asm/unaligned.h>
 
 static inline u64 mlt(u64 a, u64 b)
 {
@@ -33,16 +34,11 @@ static inline u32 and(u32 v, u32 mask)
 	return v & mask;
 }
 
-static inline u32 le32_to_cpuvp(const void *p)
-{
-	return le32_to_cpup(p);
-}
-
 int crypto_poly1305_init(struct shash_desc *desc)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
-	memset(dctx->h, 0, sizeof(dctx->h));
+	poly1305_core_init(&dctx->h);
 	dctx->buflen = 0;
 	dctx->rset = false;
 	dctx->sset = false;
@@ -51,23 +47,16 @@ int crypto_poly1305_init(struct shash_desc *desc)
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
 {
 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	dctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ffffff;
-	dctx->r[1] = (le32_to_cpuvp(key +  3) >> 2) & 0x3ffff03;
-	dctx->r[2] = (le32_to_cpuvp(key +  6) >> 4) & 0x3ffc0ff;
-	dctx->r[3] = (le32_to_cpuvp(key +  9) >> 6) & 0x3f03fff;
-	dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
+	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
+	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
+	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
+	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
+	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
 }
-
-static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
-	dctx->s[0] = le32_to_cpuvp(key +  0);
-	dctx->s[1] = le32_to_cpuvp(key +  4);
-	dctx->s[2] = le32_to_cpuvp(key +  8);
-	dctx->s[3] = le32_to_cpuvp(key + 12);
-}
+EXPORT_SYMBOL_GPL(poly1305_core_setkey);
 
 /*
  * Poly1305 requires a unique key for each tag, which implies that we can't set
@@ -79,13 +68,16 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 {
 	if (!dctx->sset) {
 		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_setrkey(dctx, src);
+			poly1305_core_setkey(&dctx->r, src);
 			src += POLY1305_BLOCK_SIZE;
 			srclen -= POLY1305_BLOCK_SIZE;
 			dctx->rset = true;
 		}
 		if (srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_setskey(dctx, src);
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
 			src += POLY1305_BLOCK_SIZE;
 			srclen -= POLY1305_BLOCK_SIZE;
 			dctx->sset = true;
@@ -95,47 +87,43 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
 
-static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
-				    const u8 *src, unsigned int srclen,
-				    u32 hibit)
+static void poly1305_blocks_internal(struct poly1305_state *state,
+				     const struct poly1305_key *key,
+				     const void *src, unsigned int nblocks,
+				     u32 hibit)
 {
 	u32 r0, r1, r2, r3, r4;
 	u32 s1, s2, s3, s4;
 	u32 h0, h1, h2, h3, h4;
 	u64 d0, d1, d2, d3, d4;
-	unsigned int datalen;
 
-	if (unlikely(!dctx->sset)) {
-		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
-		src += srclen - datalen;
-		srclen = datalen;
-	}
+	if (!nblocks)
+		return;
 
-	r0 = dctx->r[0];
-	r1 = dctx->r[1];
-	r2 = dctx->r[2];
-	r3 = dctx->r[3];
-	r4 = dctx->r[4];
+	r0 = key->r[0];
+	r1 = key->r[1];
+	r2 = key->r[2];
+	r3 = key->r[3];
+	r4 = key->r[4];
 
 	s1 = r1 * 5;
 	s2 = r2 * 5;
 	s3 = r3 * 5;
 	s4 = r4 * 5;
 
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = state->h[0];
+	h1 = state->h[1];
+	h2 = state->h[2];
+	h3 = state->h[3];
+	h4 = state->h[4];
 
-	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-
+	do {
 		/* h += m[i] */
-		h0 += (le32_to_cpuvp(src +  0) >> 0) & 0x3ffffff;
-		h1 += (le32_to_cpuvp(src +  3) >> 2) & 0x3ffffff;
-		h2 += (le32_to_cpuvp(src +  6) >> 4) & 0x3ffffff;
-		h3 += (le32_to_cpuvp(src +  9) >> 6) & 0x3ffffff;
-		h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
+		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
+		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
+		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
+		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
+		h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
 
 		/* h *= r */
 		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
@@ -158,16 +146,36 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
 
 		src += POLY1305_BLOCK_SIZE;
-		srclen -= POLY1305_BLOCK_SIZE;
+	} while (--nblocks);
+
+	state->h[0] = h0;
+	state->h[1] = h1;
+	state->h[2] = h2;
+	state->h[3] = h3;
+	state->h[4] = h4;
+}
+
+void poly1305_core_blocks(struct poly1305_state *state,
+			  const struct poly1305_key *key,
+			  const void *src, unsigned int nblocks)
+{
+	poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_blocks);
+
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
+			    const u8 *src, unsigned int srclen, u32 hibit)
+{
+	unsigned int datalen;
+
+	if (unlikely(!dctx->sset)) {
+		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+		src += srclen - datalen;
+		srclen = datalen;
 	}
 
-	dctx->h[0] = h0;
-	dctx->h[1] = h1;
-	dctx->h[2] = h2;
-	dctx->h[3] = h3;
-	dctx->h[4] = h4;
-
-	return srclen;
+	poly1305_blocks_internal(&dctx->h, &dctx->r,
+				 src, srclen / POLY1305_BLOCK_SIZE, hibit);
 }
 
 int crypto_poly1305_update(struct shash_desc *desc,
@@ -191,9 +199,9 @@ int crypto_poly1305_update(struct shash_desc *desc,
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		bytes = poly1305_blocks(dctx, src, srclen, 1 << 24);
-		src += srclen - bytes;
-		srclen = bytes;
+		poly1305_blocks(dctx, src, srclen, 1 << 24);
+		src += srclen - (srclen % POLY1305_BLOCK_SIZE);
+		srclen %= POLY1305_BLOCK_SIZE;
 	}
 
 	if (unlikely(srclen)) {
@@ -205,31 +213,18 @@ int crypto_poly1305_update(struct shash_desc *desc,
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+void poly1305_core_emit(const struct poly1305_state *state, void *dst)
 {
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	__le32 *mac = (__le32 *)dst;
 	u32 h0, h1, h2, h3, h4;
 	u32 g0, g1, g2, g3, g4;
 	u32 mask;
-	u64 f = 0;
-
-	if (unlikely(!dctx->sset))
-		return -ENOKEY;
-
-	if (unlikely(dctx->buflen)) {
-		dctx->buf[dctx->buflen++] = 1;
-		memset(dctx->buf + dctx->buflen, 0,
-		       POLY1305_BLOCK_SIZE - dctx->buflen);
-		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
-	}
 
 	/* fully carry h */
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = state->h[0];
+	h1 = state->h[1];
+	h2 = state->h[2];
+	h3 = state->h[3];
+	h4 = state->h[4];
 
 	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
@@ -259,16 +254,40 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	h4 = (h4 & mask) | g4;
 
 	/* h = h % (2^128) */
-	h0 = (h0 >>  0) | (h1 << 26);
-	h1 = (h1 >>  6) | (h2 << 20);
-	h2 = (h2 >> 12) | (h3 << 14);
-	h3 = (h3 >> 18) | (h4 <<  8);
+	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
+	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
+	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
+	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_emit);
+
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	__le32 digest[4];
+	u64 f = 0;
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_core_emit(&dctx->h, digest);
 
 	/* mac = (h + s) % (2^128) */
-	f = (f >> 32) + h0 + dctx->s[0]; mac[0] = cpu_to_le32(f);
-	f = (f >> 32) + h1 + dctx->s[1]; mac[1] = cpu_to_le32(f);
-	f = (f >> 32) + h2 + dctx->s[2]; mac[2] = cpu_to_le32(f);
-	f = (f >> 32) + h3 + dctx->s[3]; mac[3] = cpu_to_le32(f);
+	f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
+	put_unaligned_le32(f, dst + 0);
+	f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
+	put_unaligned_le32(f, dst + 4);
+	f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
+	put_unaligned_le32(f, dst + 8);
+	f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
+	put_unaligned_le32(f, dst + 12);
 
 	return 0;
 }

diff --git a/crypto/speck.c b/crypto/speck.c
deleted file mode 100644
index 58aa9f7..0000000
--- a/crypto/speck.c
+++ /dev/null

@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Speck: a lightweight block cipher
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Speck has 10 variants, including 5 block sizes.  For now we only implement
- * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
- * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
- * and a key size of K bits.  The Speck128 variants are believed to be the most
- * secure variants, and they use the same block size and key sizes as AES.  The
- * Speck64 variants are less secure, but on 32-bit processors are usually
- * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
- * secure and/or not as well suited for implementation on either 32-bit or
- * 64-bit processors, so are omitted.
- *
- * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
- * https://eprint.iacr.org/2013/404.pdf
- *
- * In a correspondence, the Speck designers have also clarified that the words
- * should be interpreted in little-endian format, and the words should be
- * ordered such that the first word of each block is 'y' rather than 'x', and
- * the first key word (rather than the last) becomes the first round key.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/speck.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-/* Speck128 */
-
-static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
-{
-	*x = ror64(*x, 8);
-	*x += *y;
-	*x ^= k;
-	*y = rol64(*y, 3);
-	*y ^= *x;
-}
-
-static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
-{
-	*y ^= *x;
-	*y = ror64(*y, 3);
-	*x ^= k;
-	*x -= *y;
-	*x = rol64(*x, 8);
-}
-
-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in)
-{
-	u64 y = get_unaligned_le64(in);
-	u64 x = get_unaligned_le64(in + 8);
-	int i;
-
-	for (i = 0; i < ctx->nrounds; i++)
-		speck128_round(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le64(y, out);
-	put_unaligned_le64(x, out + 8);
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
-
-static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in)
-{
-	u64 y = get_unaligned_le64(in);
-	u64 x = get_unaligned_le64(in + 8);
-	int i;
-
-	for (i = ctx->nrounds - 1; i >= 0; i--)
-		speck128_unround(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le64(y, out);
-	put_unaligned_le64(x, out + 8);
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
-
-static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
-			   unsigned int keylen)
-{
-	u64 l[3];
-	u64 k;
-	int i;
-
-	switch (keylen) {
-	case SPECK128_128_KEY_SIZE:
-		k = get_unaligned_le64(key);
-		l[0] = get_unaligned_le64(key + 8);
-		ctx->nrounds = SPECK128_128_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck128_round(&l[0], &k, i);
-		}
-		break;
-	case SPECK128_192_KEY_SIZE:
-		k = get_unaligned_le64(key);
-		l[0] = get_unaligned_le64(key + 8);
-		l[1] = get_unaligned_le64(key + 16);
-		ctx->nrounds = SPECK128_192_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck128_round(&l[i % 2], &k, i);
-		}
-		break;
-	case SPECK128_256_KEY_SIZE:
-		k = get_unaligned_le64(key);
-		l[0] = get_unaligned_le64(key + 8);
-		l[1] = get_unaligned_le64(key + 16);
-		l[2] = get_unaligned_le64(key + 24);
-		ctx->nrounds = SPECK128_256_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck128_round(&l[i % 3], &k, i);
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
-
-static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen)
-{
-	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
-}
-
-/* Speck64 */
-
-static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
-{
-	*x = ror32(*x, 8);
-	*x += *y;
-	*x ^= k;
-	*y = rol32(*y, 3);
-	*y ^= *x;
-}
-
-static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
-{
-	*y ^= *x;
-	*y = ror32(*y, 3);
-	*x ^= k;
-	*x -= *y;
-	*x = rol32(*x, 8);
-}
-
-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in)
-{
-	u32 y = get_unaligned_le32(in);
-	u32 x = get_unaligned_le32(in + 4);
-	int i;
-
-	for (i = 0; i < ctx->nrounds; i++)
-		speck64_round(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le32(y, out);
-	put_unaligned_le32(x, out + 4);
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
-
-static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in)
-{
-	u32 y = get_unaligned_le32(in);
-	u32 x = get_unaligned_le32(in + 4);
-	int i;
-
-	for (i = ctx->nrounds - 1; i >= 0; i--)
-		speck64_unround(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le32(y, out);
-	put_unaligned_le32(x, out + 4);
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
-
-static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
-			  unsigned int keylen)
-{
-	u32 l[3];
-	u32 k;
-	int i;
-
-	switch (keylen) {
-	case SPECK64_96_KEY_SIZE:
-		k = get_unaligned_le32(key);
-		l[0] = get_unaligned_le32(key + 4);
-		l[1] = get_unaligned_le32(key + 8);
-		ctx->nrounds = SPECK64_96_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck64_round(&l[i % 2], &k, i);
-		}
-		break;
-	case SPECK64_128_KEY_SIZE:
-		k = get_unaligned_le32(key);
-		l[0] = get_unaligned_le32(key + 4);
-		l[1] = get_unaligned_le32(key + 8);
-		l[2] = get_unaligned_le32(key + 12);
-		ctx->nrounds = SPECK64_128_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck64_round(&l[i % 3], &k, i);
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
-
-static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen)
-{
-	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
-}
-
-/* Algorithm definitions */
-
-static struct crypto_alg speck_algs[] = {
-	{
-		.cra_name		= "speck128",
-		.cra_driver_name	= "speck128-generic",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-		.cra_blocksize		= SPECK128_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
-		.cra_module		= THIS_MODULE,
-		.cra_u			= {
-			.cipher = {
-				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
-				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
-				.cia_setkey		= speck128_setkey,
-				.cia_encrypt		= speck128_encrypt,
-				.cia_decrypt		= speck128_decrypt
-			}
-		}
-	}, {
-		.cra_name		= "speck64",
-		.cra_driver_name	= "speck64-generic",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-		.cra_blocksize		= SPECK64_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
-		.cra_module		= THIS_MODULE,
-		.cra_u			= {
-			.cipher = {
-				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
-				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
-				.cia_setkey		= speck64_setkey,
-				.cia_encrypt		= speck64_encrypt,
-				.cia_decrypt		= speck64_decrypt
-			}
-		}
-	}
-};
-
-static int __init speck_module_init(void)
-{
-	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_module_exit(void)
-{
-	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_module_init);
-module_exit(speck_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (generic)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("speck128");
-MODULE_ALIAS_CRYPTO("speck128-generic");
-MODULE_ALIAS_CRYPTO("speck64");
-MODULE_ALIAS_CRYPTO("speck64-generic");

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index babbda2..fb79561 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c

@@ -1610,6 +1610,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				  speed_template_32);
 		break;
 
+	case 219:
+		test_cipher_speed("adiantum(xchacha12,aes)", ENCRYPT, sec, NULL,
+				  0, speed_template_32);
+		test_cipher_speed("adiantum(xchacha12,aes)", DECRYPT, sec, NULL,
+				  0, speed_template_32);
+		test_cipher_speed("adiantum(xchacha20,aes)", ENCRYPT, sec, NULL,
+				  0, speed_template_32);
+		test_cipher_speed("adiantum(xchacha20,aes)", DECRYPT, sec, NULL,
+				  0, speed_template_32);
+		break;
 
 	case 300:
 		if (alg) {

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index bcbd3d4..898ef4d 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c

@@ -62,7 +62,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
  */
 #define IDX1		32
 #define IDX2		32400
-#define IDX3		1
+#define IDX3		1511
 #define IDX4		8193
 #define IDX5		22222
 #define IDX6		17101
@@ -2173,6 +2173,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "adiantum(xchacha12,aes)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = adiantum_xchacha12_aes_enc_tv_template,
+					.count = ARRAY_SIZE(adiantum_xchacha12_aes_enc_tv_template),
+				},
+				.dec = {
+					.vecs = adiantum_xchacha12_aes_dec_tv_template,
+					.count = ARRAY_SIZE(adiantum_xchacha12_aes_dec_tv_template),
+				},
+			}
+		},
+	}, {
+		.alg = "adiantum(xchacha20,aes)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = adiantum_xchacha20_aes_enc_tv_template,
+					.count = ARRAY_SIZE(adiantum_xchacha20_aes_enc_tv_template),
+				},
+				.dec = {
+					.vecs = adiantum_xchacha20_aes_dec_tv_template,
+					.count = ARRAY_SIZE(adiantum_xchacha20_aes_dec_tv_template),
+				},
+			}
+		},
+	}, {
 		.alg = "ansi_cprng",
 		.test = alg_test_cprng,
 		.suite = {
@@ -3238,36 +3268,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
-		.alg = "ecb(speck128)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = {
-				.enc = {
-					.vecs = speck128_enc_tv_template,
-					.count = ARRAY_SIZE(speck128_enc_tv_template)
-				},
-				.dec = {
-					.vecs = speck128_dec_tv_template,
-					.count = ARRAY_SIZE(speck128_dec_tv_template)
-				}
-			}
-		}
-	}, {
-		.alg = "ecb(speck64)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = {
-				.enc = {
-					.vecs = speck64_enc_tv_template,
-					.count = ARRAY_SIZE(speck64_enc_tv_template)
-				},
-				.dec = {
-					.vecs = speck64_dec_tv_template,
-					.count = ARRAY_SIZE(speck64_dec_tv_template)
-				}
-			}
-		}
-	}, {
 		.alg = "ecb(tea)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -3675,6 +3675,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "nhpoly1305",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = nhpoly1305_tv_template,
+				.count = ARRAY_SIZE(nhpoly1305_tv_template),
+			}
+		}
+	}, {
 		.alg = "ofb(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -4027,6 +4036,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "xchacha12",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = xchacha12_tv_template,
+					.count = ARRAY_SIZE(xchacha12_tv_template),
+				},
+				.dec = {
+					.vecs = xchacha12_tv_template,
+					.count = ARRAY_SIZE(xchacha12_tv_template),
+				},
+			}
+		},
+	}, {
+		.alg = "xchacha20",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = xchacha20_tv_template,
+					.count = ARRAY_SIZE(xchacha20_tv_template),
+				},
+				.dec = {
+					.vecs = xchacha20_tv_template,
+					.count = ARRAY_SIZE(xchacha20_tv_template),
+				},
+			}
+		},
+	}, {
 		.alg = "xts(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -4088,36 +4127,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
-		.alg = "xts(speck128)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = {
-				.enc = {
-					.vecs = speck128_xts_enc_tv_template,
-					.count = ARRAY_SIZE(speck128_xts_enc_tv_template)
-				},
-				.dec = {
-					.vecs = speck128_xts_dec_tv_template,
-					.count = ARRAY_SIZE(speck128_xts_dec_tv_template)
-				}
-			}
-		}
-	}, {
-		.alg = "xts(speck64)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = {
-				.enc = {
-					.vecs = speck64_xts_enc_tv_template,
-					.count = ARRAY_SIZE(speck64_xts_enc_tv_template)
-				},
-				.dec = {
-					.vecs = speck64_xts_dec_tv_template,
-					.count = ARRAY_SIZE(speck64_xts_dec_tv_template)
-				}
-			}
-		}
-	}, {
 		.alg = "xts(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -4132,6 +4141,22 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "zstd",
+		.test = alg_test_comp,
+		.fips_allowed = 1,
+		.suite = {
+			.comp = {
+				.comp = {
+					.vecs = zstd_comp_tv_template,
+					.count = ZSTD_COMP_TEST_VECTORS
+				},
+				.decomp = {
+					.vecs = zstd_decomp_tv_template,
+					.count = ZSTD_DECOMP_TEST_VECTORS
+				}
+			}
+		}
 	}
 };
 

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index daae6c1..4a349b7 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h

@@ -29,7 +29,7 @@
 #define MAX_DIGEST_SIZE		64
 #define MAX_TAP			8
 
-#define MAX_KEYLEN		160
+#define MAX_KEYLEN		1088
 #define MAX_IVLEN		32
 
 struct hash_testvec {
@@ -37,10 +37,10 @@ struct hash_testvec {
 	char *key;
 	char *plaintext;
 	char *digest;
-	unsigned char tap[MAX_TAP];
+	unsigned short tap[MAX_TAP];
 	unsigned short psize;
-	unsigned char np;
-	unsigned char ksize;
+	unsigned short np;
+	unsigned short ksize;
 };
 
 /*
@@ -4530,6 +4530,1237 @@ static struct hash_testvec poly1305_tv_template[] = {
 	},
 };
 
+/* NHPoly1305 test vectors from https://github.com/google/adiantum */
+static struct hash_testvec nhpoly1305_tv_template[] = {
+	{
+		.key	= "\xd2\x5d\x4c\xdd\x8d\x2b\x7f\x7a"
+			  "\xd9\xbe\x71\xec\xd1\x83\x52\xe3"
+			  "\xe1\xad\xd7\x5c\x0a\x75\x9d\xec"
+			  "\x1d\x13\x7e\x5d\x71\x07\xc9\xe4"
+			  "\x57\x2d\x44\x68\xcf\xd8\xd6\xc5"
+			  "\x39\x69\x7d\x32\x75\x51\x4f\x7e"
+			  "\xb2\x4c\xc6\x90\x51\x6e\xd9\xd6"
+			  "\xa5\x8b\x2d\xf1\x94\xf9\xf7\x5e"
+			  "\x2c\x84\x7b\x41\x0f\x88\x50\x89"
+			  "\x30\xd9\xa1\x38\x46\x6c\xc0\x4f"
+			  "\xe8\xdf\xdc\x66\xab\x24\x43\x41"
+			  "\x91\x55\x29\x65\x86\x28\x5e\x45"
+			  "\xd5\x2d\xb7\x80\x08\x9a\xc3\xd4"
+			  "\x9a\x77\x0a\xd4\xef\x3e\xe6\x3f"
+			  "\x6f\x2f\x9b\x3a\x7d\x12\x1e\x80"
+			  "\x6c\x44\xa2\x25\xe1\xf6\x60\xe9"
+			  "\x0d\xaf\xc5\x3c\xa5\x79\xae\x64"
+			  "\xbc\xa0\x39\xa3\x4d\x10\xe5\x4d"
+			  "\xd5\xe7\x89\x7a\x13\xee\x06\x78"
+			  "\xdc\xa4\xdc\x14\x27\xe6\x49\x38"
+			  "\xd0\xe0\x45\x25\x36\xc5\xf4\x79"
+			  "\x2e\x9a\x98\x04\xe4\x2b\x46\x52"
+			  "\x7c\x33\xca\xe2\x56\x51\x50\xe2"
+			  "\xa5\x9a\xae\x18\x6a\x13\xf8\xd2"
+			  "\x21\x31\x66\x02\xe2\xda\x8d\x7e"
+			  "\x41\x19\xb2\x61\xee\x48\x8f\xf1"
+			  "\x65\x24\x2e\x1e\x68\xce\x05\xd9"
+			  "\x2a\xcf\xa5\x3a\x57\xdd\x35\x91"
+			  "\x93\x01\xca\x95\xfc\x2b\x36\x04"
+			  "\xe6\x96\x97\x28\xf6\x31\xfe\xa3"
+			  "\x9d\xf6\x6a\x1e\x80\x8d\xdc\xec"
+			  "\xaf\x66\x11\x13\x02\x88\xd5\x27"
+			  "\x33\xb4\x1a\xcd\xa3\xf6\xde\x31"
+			  "\x8e\xc0\x0e\x6c\xd8\x5a\x97\x5e"
+			  "\xdd\xfd\x60\x69\x38\x46\x3f\x90"
+			  "\x5e\x97\xd3\x32\x76\xc7\x82\x49"
+			  "\xfe\xba\x06\x5f\x2f\xa2\xfd\xff"
+			  "\x80\x05\x40\xe4\x33\x03\xfb\x10"
+			  "\xc0\xde\x65\x8c\xc9\x8d\x3a\x9d"
+			  "\xb5\x7b\x36\x4b\xb5\x0c\xcf\x00"
+			  "\x9c\x87\xe4\x49\xad\x90\xda\x4a"
+			  "\xdd\xbd\xff\xe2\x32\x57\xd6\x78"
+			  "\x36\x39\x6c\xd3\x5b\x9b\x88\x59"
+			  "\x2d\xf0\x46\xe4\x13\x0e\x2b\x35"
+			  "\x0d\x0f\x73\x8a\x4f\x26\x84\x75"
+			  "\x88\x3c\xc5\x58\x66\x18\x1a\xb4"
+			  "\x64\x51\x34\x27\x1b\xa4\x11\xc9"
+			  "\x6d\x91\x8a\xfa\x32\x60\x9d\xd7"
+			  "\x87\xe5\xaa\x43\x72\xf8\xda\xd1"
+			  "\x48\x44\x13\x61\xdc\x8c\x76\x17"
+			  "\x0c\x85\x4e\xf3\xdd\xa2\x42\xd2"
+			  "\x74\xc1\x30\x1b\xeb\x35\x31\x29"
+			  "\x5b\xd7\x4c\x94\x46\x35\xa1\x23"
+			  "\x50\xf2\xa2\x8e\x7e\x4f\x23\x4f"
+			  "\x51\xff\xe2\xc9\xa3\x7d\x56\x8b"
+			  "\x41\xf2\xd0\xc5\x57\x7e\x59\xac"
+			  "\xbb\x65\xf3\xfe\xf7\x17\xef\x63"
+			  "\x7c\x6f\x23\xdd\x22\x8e\xed\x84"
+			  "\x0e\x3b\x09\xb3\xf3\xf4\x8f\xcd"
+			  "\x37\xa8\xe1\xa7\x30\xdb\xb1\xa2"
+			  "\x9c\xa2\xdf\x34\x17\x3e\x68\x44"
+			  "\xd0\xde\x03\x50\xd1\x48\x6b\x20"
+			  "\xe2\x63\x45\xa5\xea\x87\xc2\x42"
+			  "\x95\x03\x49\x05\xed\xe0\x90\x29"
+			  "\x1a\xb8\xcf\x9b\x43\xcf\x29\x7a"
+			  "\x63\x17\x41\x9f\xe0\xc9\x10\xfd"
+			  "\x2c\x56\x8c\x08\x55\xb4\xa9\x27"
+			  "\x0f\x23\xb1\x05\x6a\x12\x46\xc7"
+			  "\xe1\xfe\x28\x93\x93\xd7\x2f\xdc"
+			  "\x98\x30\xdb\x75\x8a\xbe\x97\x7a"
+			  "\x02\xfb\x8c\xba\xbe\x25\x09\xbe"
+			  "\xce\xcb\xa2\xef\x79\x4d\x0e\x9d"
+			  "\x1b\x9d\xb6\x39\x34\x38\xfa\x07"
+			  "\xec\xe8\xfc\x32\x85\x1d\xf7\x85"
+			  "\x63\xc3\x3c\xc0\x02\x75\xd7\x3f"
+			  "\xb2\x68\x60\x66\x65\x81\xc6\xb1"
+			  "\x42\x65\x4b\x4b\x28\xd7\xc7\xaa"
+			  "\x9b\xd2\xdc\x1b\x01\xe0\x26\x39"
+			  "\x01\xc1\x52\x14\xd1\x3f\xb7\xe6"
+			  "\x61\x41\xc7\x93\xd2\xa2\x67\xc6"
+			  "\xf7\x11\xb5\xf5\xea\xdd\x19\xfb"
+			  "\x4d\x21\x12\xd6\x7d\xf1\x10\xb0"
+			  "\x89\x07\xc7\x5a\x52\x73\x70\x2f"
+			  "\x32\xef\x65\x2b\x12\xb2\xf0\xf5"
+			  "\x20\xe0\x90\x59\x7e\x64\xf1\x4c"
+			  "\x41\xb3\xa5\x91\x08\xe6\x5e\x5f"
+			  "\x05\x56\x76\xb4\xb0\xcd\x70\x53"
+			  "\x10\x48\x9c\xff\xc2\x69\x55\x24"
+			  "\x87\xef\x84\xea\xfb\xa7\xbf\xa0"
+			  "\x91\x04\xad\x4f\x8b\x57\x54\x4b"
+			  "\xb6\xe9\xd1\xac\x37\x2f\x1d\x2e"
+			  "\xab\xa5\xa4\xe8\xff\xfb\xd9\x39"
+			  "\x2f\xb7\xac\xd1\xfe\x0b\x9a\x80"
+			  "\x0f\xb6\xf4\x36\x39\x90\x51\xe3"
+			  "\x0a\x2f\xb6\x45\x76\x89\xcd\x61"
+			  "\xfe\x48\x5f\x75\x1d\x13\x00\x62"
+			  "\x80\x24\x47\xe7\xbc\x37\xd7\xe3"
+			  "\x15\xe8\x68\x22\xaf\x80\x6f\x4b"
+			  "\xa8\x9f\x01\x10\x48\x14\xc3\x02"
+			  "\x52\xd2\xc7\x75\x9b\x52\x6d\x30"
+			  "\xac\x13\x85\xc8\xf7\xa3\x58\x4b"
+			  "\x49\xf7\x1c\x45\x55\x8c\x39\x9a"
+			  "\x99\x6d\x97\x27\x27\xe6\xab\xdd"
+			  "\x2c\x42\x1b\x35\xdd\x9d\x73\xbb"
+			  "\x6c\xf3\x64\xf1\xfb\xb9\xf7\xe6"
+			  "\x4a\x3c\xc0\x92\xc0\x2e\xb7\x1a"
+			  "\xbe\xab\xb3\x5a\xe5\xea\xb1\x48"
+			  "\x58\x13\x53\x90\xfd\xc3\x8e\x54"
+			  "\xf9\x18\x16\x73\xe8\xcb\x6d\x39"
+			  "\x0e\xd7\xe0\xfe\xb6\x9f\x43\x97"
+			  "\xe8\xd0\x85\x56\x83\x3e\x98\x68"
+			  "\x7f\xbd\x95\xa8\x9a\x61\x21\x8f"
+			  "\x06\x98\x34\xa6\xc8\xd6\x1d\xf3"
+			  "\x3d\x43\xa4\x9a\x8c\xe5\xd3\x5a"
+			  "\x32\xa2\x04\x22\xa4\x19\x1a\x46"
+			  "\x42\x7e\x4d\xe5\xe0\xe6\x0e\xca"
+			  "\xd5\x58\x9d\x2c\xaf\xda\x33\x5c"
+			  "\xb0\x79\x9e\xc9\xfc\xca\xf0\x2f"
+			  "\xa8\xb2\x77\xeb\x7a\xa2\xdd\x37"
+			  "\x35\x83\x07\xd6\x02\x1a\xb6\x6c"
+			  "\x24\xe2\x59\x08\x0e\xfd\x3e\x46"
+			  "\xec\x40\x93\xf4\x00\x26\x4f\x2a"
+			  "\xff\x47\x2f\xeb\x02\x92\x26\x5b"
+			  "\x53\x17\xc2\x8d\x2a\xc7\xa3\x1b"
+			  "\xcd\xbc\xa7\xe8\xd1\x76\xe3\x80"
+			  "\x21\xca\x5d\x3b\xe4\x9c\x8f\xa9"
+			  "\x5b\x7f\x29\x7f\x7c\xd8\xed\x6d"
+			  "\x8c\xb2\x86\x85\xe7\x77\xf2\x85"
+			  "\xab\x38\xa9\x9d\xc1\x4e\xc5\x64"
+			  "\x33\x73\x8b\x59\x03\xad\x05\xdf"
+			  "\x25\x98\x31\xde\xef\x13\xf1\x9b"
+			  "\x3c\x91\x9d\x7b\xb1\xfa\xe6\xbf"
+			  "\x5b\xed\xa5\x55\xe6\xea\x6c\x74"
+			  "\xf4\xb9\xe4\x45\x64\x72\x81\xc2"
+			  "\x4c\x28\xd4\xcd\xac\xe2\xde\xf9"
+			  "\xeb\x5c\xeb\x61\x60\x5a\xe5\x28",
+		.ksize	= 1088,
+		.plaintext	= "",
+		.psize	= 0,
+		.digest	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, {
+		.key	= "\x29\x21\x43\xcb\xcb\x13\x07\xde"
+			  "\xbf\x48\xdf\x8a\x7f\xa2\x84\xde"
+			  "\x72\x23\x9d\xf5\xf0\x07\xf2\x4c"
+			  "\x20\x3a\x93\xb9\xcd\x5d\xfe\xcb"
+			  "\x99\x2c\x2b\x58\xc6\x50\x5f\x94"
+			  "\x56\xc3\x7c\x0d\x02\x3f\xb8\x5e"
+			  "\x7b\xc0\x6c\x51\x34\x76\xc0\x0e"
+			  "\xc6\x22\xc8\x9e\x92\xa0\x21\xc9"
+			  "\x85\x5c\x7c\xf8\xe2\x64\x47\xc9"
+			  "\xe4\xa2\x57\x93\xf8\xa2\x69\xcd"
+			  "\x62\x98\x99\xf4\xd7\x7b\x14\xb1"
+			  "\xd8\x05\xff\x04\x15\xc9\xe1\x6e"
+			  "\x9b\xe6\x50\x6b\x0b\x3f\x22\x1f"
+			  "\x08\xde\x0c\x5b\x08\x7e\xc6\x2f"
+			  "\x6c\xed\xd6\xb2\x15\xa4\xb3\xf9"
+			  "\xa7\x46\x38\x2a\xea\x69\xa5\xde"
+			  "\x02\xc3\x96\x89\x4d\x55\x3b\xed"
+			  "\x3d\x3a\x85\x77\xbf\x97\x45\x5c"
+			  "\x9e\x02\x69\xe2\x1b\x68\xbe\x96"
+			  "\xfb\x64\x6f\x0f\xf6\x06\x40\x67"
+			  "\xfa\x04\xe3\x55\xfa\xbe\xa4\x60"
+			  "\xef\x21\x66\x97\xe6\x9d\x5c\x1f"
+			  "\x62\x37\xaa\x31\xde\xe4\x9c\x28"
+			  "\x95\xe0\x22\x86\xf4\x4d\xf3\x07"
+			  "\xfd\x5f\x3a\x54\x2c\x51\x80\x71"
+			  "\xba\x78\x69\x5b\x65\xab\x1f\x81"
+			  "\xed\x3b\xff\x34\xa3\xfb\xbc\x73"
+			  "\x66\x7d\x13\x7f\xdf\x6e\xe2\xe2"
+			  "\xeb\x4f\x6c\xda\x7d\x33\x57\xd0"
+			  "\xd3\x7c\x95\x4f\x33\x58\x21\xc7"
+			  "\xc0\xe5\x6f\x42\x26\xc6\x1f\x5e"
+			  "\x85\x1b\x98\x9a\xa2\x1e\x55\x77"
+			  "\x23\xdf\x81\x5e\x79\x55\x05\xfc"
+			  "\xfb\xda\xee\xba\x5a\xba\xf7\x77"
+			  "\x7f\x0e\xd3\xe1\x37\xfe\x8d\x2b"
+			  "\xd5\x3f\xfb\xd0\xc0\x3c\x0b\x3f"
+			  "\xcf\x3c\x14\xcf\xfb\x46\x72\x4c"
+			  "\x1f\x39\xe2\xda\x03\x71\x6d\x23"
+			  "\xef\x93\xcd\x39\xd9\x37\x80\x4d"
+			  "\x65\x61\xd1\x2c\x03\xa9\x47\x72"
+			  "\x4d\x1e\x0e\x16\x33\x0f\x21\x17"
+			  "\xec\x92\xea\x6f\x37\x22\xa4\xd8"
+			  "\x03\x33\x9e\xd8\x03\x69\x9a\xe8"
+			  "\xb2\x57\xaf\x78\x99\x05\x12\xab"
+			  "\x48\x90\x80\xf0\x12\x9b\x20\x64"
+			  "\x7a\x1d\x47\x5f\xba\x3c\xf9\xc3"
+			  "\x0a\x0d\x8d\xa1\xf9\x1b\x82\x13"
+			  "\x3e\x0d\xec\x0a\x83\xc0\x65\xe1"
+			  "\xe9\x95\xff\x97\xd6\xf2\xe4\xd5"
+			  "\x86\xc0\x1f\x29\x27\x63\xd7\xde"
+			  "\xb7\x0a\x07\x99\x04\x2d\xa3\x89"
+			  "\xa2\x43\xcf\xf3\xe1\x43\xac\x4a"
+			  "\x06\x97\xd0\x05\x4f\x87\xfa\xf9"
+			  "\x9b\xbf\x52\x70\xbd\xbc\x6c\xf3"
+			  "\x03\x13\x60\x41\x28\x09\xec\xcc"
+			  "\xb1\x1a\xec\xd6\xfb\x6f\x2a\x89"
+			  "\x5d\x0b\x53\x9c\x59\xc1\x84\x21"
+			  "\x33\x51\x47\x19\x31\x9c\xd4\x0a"
+			  "\x4d\x04\xec\x50\x90\x61\xbd\xbc"
+			  "\x7e\xc8\xd9\x6c\x98\x1d\x45\x41"
+			  "\x17\x5e\x97\x1c\xc5\xa8\xe8\xea"
+			  "\x46\x58\x53\xf7\x17\xd5\xad\x11"
+			  "\xc8\x54\xf5\x7a\x33\x90\xf5\x19"
+			  "\xba\x36\xb4\xfc\x52\xa5\x72\x3d"
+			  "\x14\xbb\x55\xa7\xe9\xe3\x12\xf7"
+			  "\x1c\x30\xa2\x82\x03\xbf\x53\x91"
+			  "\x2e\x60\x41\x9f\x5b\x69\x39\xf6"
+			  "\x4d\xc8\xf8\x46\x7a\x7f\xa4\x98"
+			  "\x36\xff\x06\xcb\xca\xe7\x33\xf2"
+			  "\xc0\x4a\xf4\x3c\x14\x44\x5f\x6b"
+			  "\x75\xef\x02\x36\x75\x08\x14\xfd"
+			  "\x10\x8e\xa5\x58\xd0\x30\x46\x49"
+			  "\xaf\x3a\xf8\x40\x3d\x35\xdb\x84"
+			  "\x11\x2e\x97\x6a\xb7\x87\x7f\xad"
+			  "\xf1\xfa\xa5\x63\x60\xd8\x5e\xbf"
+			  "\x41\x78\x49\xcf\x77\xbb\x56\xbb"
+			  "\x7d\x01\x67\x05\x22\xc8\x8f\x41"
+			  "\xba\x81\xd2\xca\x2c\x38\xac\x76"
+			  "\x06\xc1\x1a\xc2\xce\xac\x90\x67"
+			  "\x57\x3e\x20\x12\x5b\xd9\x97\x58"
+			  "\x65\x05\xb7\x04\x61\x7e\xd8\x3a"
+			  "\xbf\x55\x3b\x13\xe9\x34\x5a\x37"
+			  "\x36\xcb\x94\x45\xc5\x32\xb3\xa0"
+			  "\x0c\x3e\x49\xc5\xd3\xed\xa7\xf0"
+			  "\x1c\x69\xcc\xea\xcc\x83\xc9\x16"
+			  "\x95\x72\x4b\xf4\x89\xd5\xb9\x10"
+			  "\xf6\x2d\x60\x15\xea\x3c\x06\x66"
+			  "\x9f\x82\xad\x17\xce\xd2\xa4\x48"
+			  "\x7c\x65\xd9\xf8\x02\x4d\x9b\x4c"
+			  "\x89\x06\x3a\x34\x85\x48\x89\x86"
+			  "\xf9\x24\xa9\x54\x72\xdb\x44\x95"
+			  "\xc7\x44\x1c\x19\x11\x4c\x04\xdc"
+			  "\x13\xb9\x67\xc8\xc3\x3a\x6a\x50"
+			  "\xfa\xd1\xfb\xe1\x88\xb6\xf1\xa3"
+			  "\xc5\x3b\xdc\x38\x45\x16\x26\x02"
+			  "\x3b\xb8\x8f\x8b\x58\x7d\x23\x04"
+			  "\x50\x6b\x81\x9f\xae\x66\xac\x6f"
+			  "\xcf\x2a\x9d\xf1\xfd\x1d\x57\x07"
+			  "\xbe\x58\xeb\x77\x0c\xe3\xc2\x19"
+			  "\x14\x74\x1b\x51\x1c\x4f\x41\xf3"
+			  "\x32\x89\xb3\xe7\xde\x62\xf6\x5f"
+			  "\xc7\x6a\x4a\x2a\x5b\x0f\x5f\x87"
+			  "\x9c\x08\xb9\x02\x88\xc8\x29\xb7"
+			  "\x94\x52\xfa\x52\xfe\xaa\x50\x10"
+			  "\xba\x48\x75\x5e\x11\x1b\xe6\x39"
+			  "\xd7\x82\x2c\x87\xf1\x1e\xa4\x38"
+			  "\x72\x3e\x51\xe7\xd8\x3e\x5b\x7b"
+			  "\x31\x16\x89\xba\xd6\xad\x18\x5e"
+			  "\xba\xf8\x12\xb3\xf4\x6c\x47\x30"
+			  "\xc0\x38\x58\xb3\x10\x8d\x58\x5d"
+			  "\xb4\xfb\x19\x7e\x41\xc3\x66\xb8"
+			  "\xd6\x72\x84\xe1\x1a\xc2\x71\x4c"
+			  "\x0d\x4a\x21\x7a\xab\xa2\xc0\x36"
+			  "\x15\xc5\xe9\x46\xd7\x29\x17\x76"
+			  "\x5e\x47\x36\x7f\x72\x05\xa7\xcc"
+			  "\x36\x63\xf9\x47\x7d\xe6\x07\x3c"
+			  "\x8b\x79\x1d\x96\x61\x8d\x90\x65"
+			  "\x7c\xf5\xeb\x4e\x6e\x09\x59\x6d"
+			  "\x62\x50\x1b\x0f\xe0\xdc\x78\xf2"
+			  "\x5b\x83\x1a\xa1\x11\x75\xfd\x18"
+			  "\xd7\xe2\x8d\x65\x14\x21\xce\xbe"
+			  "\xb5\x87\xe3\x0a\xda\x24\x0a\x64"
+			  "\xa9\x9f\x03\x8d\x46\x5d\x24\x1a"
+			  "\x8a\x0c\x42\x01\xca\xb1\x5f\x7c"
+			  "\xa5\xac\x32\x4a\xb8\x07\x91\x18"
+			  "\x6f\xb0\x71\x3c\xc9\xb1\xa8\xf8"
+			  "\x5f\x69\xa5\xa1\xca\x9e\x7a\xaa"
+			  "\xac\xe9\xc7\x47\x41\x75\x25\xc3"
+			  "\x73\xe2\x0b\xdd\x6d\x52\x71\xbe"
+			  "\xc5\xdc\xb4\xe7\x01\x26\x53\x77"
+			  "\x86\x90\x85\x68\x6b\x7b\x03\x53"
+			  "\xda\x52\x52\x51\x68\xc8\xf3\xec"
+			  "\x6c\xd5\x03\x7a\xa3\x0e\xb4\x02"
+			  "\x5f\x1a\xab\xee\xca\x67\x29\x7b"
+			  "\xbd\x96\x59\xb3\x8b\x32\x7a\x92"
+			  "\x9f\xd8\x25\x2b\xdf\xc0\x4c\xda",
+		.ksize	= 1088,
+		.plaintext	= "\xbc\xda\x81\xa8\x78\x79\x1c\xbf"
+			  "\x77\x53\xba\x4c\x30\x5b\xb8\x33",
+		.psize	= 16,
+		.digest	= "\x04\xbf\x7f\x6a\xce\x72\xea\x6a"
+			  "\x79\xdb\xb0\xc9\x60\xf6\x12\xcc",
+		.np	= 6,
+		.tap	= { 4, 4, 1, 1, 1, 5 },
+	}, {
+		.key	= "\x65\x4d\xe3\xf8\xd2\x4c\xac\x28"
+			  "\x68\xf5\xb3\x81\x71\x4b\xa1\xfa"
+			  "\x04\x0e\xd3\x81\x36\xbe\x0c\x81"
+			  "\x5e\xaf\xbc\x3a\xa4\xc0\x8e\x8b"
+			  "\x55\x63\xd3\x52\x97\x88\xd6\x19"
+			  "\xbc\x96\xdf\x49\xff\x04\x63\xf5"
+			  "\x0c\x11\x13\xaa\x9e\x1f\x5a\xf7"
+			  "\xdd\xbd\x37\x80\xc3\xd0\xbe\xa7"
+			  "\x05\xc8\x3c\x98\x1e\x05\x3c\x84"
+			  "\x39\x61\xc4\xed\xed\x71\x1b\xc4"
+			  "\x74\x45\x2c\xa1\x56\x70\x97\xfd"
+			  "\x44\x18\x07\x7d\xca\x60\x1f\x73"
+			  "\x3b\x6d\x21\xcb\x61\x87\x70\x25"
+			  "\x46\x21\xf1\x1f\x21\x91\x31\x2d"
+			  "\x5d\xcc\xb7\xd1\x84\x3e\x3d\xdb"
+			  "\x03\x53\x2a\x82\xa6\x9a\x95\xbc"
+			  "\x1a\x1e\x0a\x5e\x07\x43\xab\x43"
+			  "\xaf\x92\x82\x06\x91\x04\x09\xf4"
+			  "\x17\x0a\x9a\x2c\x54\xdb\xb8\xf4"
+			  "\xd0\xf0\x10\x66\x24\x8d\xcd\xda"
+			  "\xfe\x0e\x45\x9d\x6f\xc4\x4e\xf4"
+			  "\x96\xaf\x13\xdc\xa9\xd4\x8c\xc4"
+			  "\xc8\x57\x39\x3c\xc2\xd3\x0a\x76"
+			  "\x4a\x1f\x75\x83\x44\xc7\xd1\x39"
+			  "\xd8\xb5\x41\xba\x73\x87\xfa\x96"
+			  "\xc7\x18\x53\xfb\x9b\xda\xa0\x97"
+			  "\x1d\xee\x60\x85\x9e\x14\xc3\xce"
+			  "\xc4\x05\x29\x3b\x95\x30\xa3\xd1"
+			  "\x9f\x82\x6a\x04\xf5\xa7\x75\x57"
+			  "\x82\x04\xfe\x71\x51\x71\xb1\x49"
+			  "\x50\xf8\xe0\x96\xf1\xfa\xa8\x88"
+			  "\x3f\xa0\x86\x20\xd4\x60\x79\x59"
+			  "\x17\x2d\xd1\x09\xf4\xec\x05\x57"
+			  "\xcf\x62\x7e\x0e\x7e\x60\x78\xe6"
+			  "\x08\x60\x29\xd8\xd5\x08\x1a\x24"
+			  "\xc4\x6c\x24\xe7\x92\x08\x3d\x8a"
+			  "\x98\x7a\xcf\x99\x0a\x65\x0e\xdc"
+			  "\x8c\x8a\xbe\x92\x82\x91\xcc\x62"
+			  "\x30\xb6\xf4\x3f\xc6\x8a\x7f\x12"
+			  "\x4a\x8a\x49\xfa\x3f\x5c\xd4\x5a"
+			  "\xa6\x82\xa3\xe6\xaa\x34\x76\xb2"
+			  "\xab\x0a\x30\xef\x6c\x77\x58\x3f"
+			  "\x05\x6b\xcc\x5c\xae\xdc\xd7\xb9"
+			  "\x51\x7e\x8d\x32\x5b\x24\x25\xbe"
+			  "\x2b\x24\x01\xcf\x80\xda\x16\xd8"
+			  "\x90\x72\x2c\xad\x34\x8d\x0c\x74"
+			  "\x02\xcb\xfd\xcf\x6e\xef\x97\xb5"
+			  "\x4c\xf2\x68\xca\xde\x43\x9e\x8a"
+			  "\xc5\x5f\x31\x7f\x14\x71\x38\xec"
+			  "\xbd\x98\xe5\x71\xc4\xb5\xdb\xef"
+			  "\x59\xd2\xca\xc0\xc1\x86\x75\x01"
+			  "\xd4\x15\x0d\x6f\xa4\xf7\x7b\x37"
+			  "\x47\xda\x18\x93\x63\xda\xbe\x9e"
+			  "\x07\xfb\xb2\x83\xd5\xc4\x34\x55"
+			  "\xee\x73\xa1\x42\x96\xf9\x66\x41"
+			  "\xa4\xcc\xd2\x93\x6e\xe1\x0a\xbb"
+			  "\xd2\xdd\x18\x23\xe6\x6b\x98\x0b"
+			  "\x8a\x83\x59\x2c\xc3\xa6\x59\x5b"
+			  "\x01\x22\x59\xf7\xdc\xb0\x87\x7e"
+			  "\xdb\x7d\xf4\x71\x41\xab\xbd\xee"
+			  "\x79\xbe\x3c\x01\x76\x0b\x2d\x0a"
+			  "\x42\xc9\x77\x8c\xbb\x54\x95\x60"
+			  "\x43\x2e\xe0\x17\x52\xbd\x90\xc9"
+			  "\xc2\x2c\xdd\x90\x24\x22\x76\x40"
+			  "\x5c\xb9\x41\xc9\xa1\xd5\xbd\xe3"
+			  "\x44\xe0\xa4\xab\xcc\xb8\xe2\x32"
+			  "\x02\x15\x04\x1f\x8c\xec\x5d\x14"
+			  "\xac\x18\xaa\xef\x6e\x33\x19\x6e"
+			  "\xde\xfe\x19\xdb\xeb\x61\xca\x18"
+			  "\xad\xd8\x3d\xbf\x09\x11\xc7\xa5"
+			  "\x86\x0b\x0f\xe5\x3e\xde\xe8\xd9"
+			  "\x0a\x69\x9e\x4c\x20\xff\xf9\xc5"
+			  "\xfa\xf8\xf3\x7f\xa5\x01\x4b\x5e"
+			  "\x0f\xf0\x3b\x68\xf0\x46\x8c\x2a"
+			  "\x7a\xc1\x8f\xa0\xfe\x6a\x5b\x44"
+			  "\x70\x5c\xcc\x92\x2c\x6f\x0f\xbd"
+			  "\x25\x3e\xb7\x8e\x73\x58\xda\xc9"
+			  "\xa5\xaa\x9e\xf3\x9b\xfd\x37\x3e"
+			  "\xe2\x88\xa4\x7b\xc8\x5c\xa8\x93"
+			  "\x0e\xe7\x9a\x9c\x2e\x95\x18\x9f"
+			  "\xc8\x45\x0c\x88\x9e\x53\x4f\x3a"
+			  "\x76\xc1\x35\xfa\x17\xd8\xac\xa0"
+			  "\x0c\x2d\x47\x2e\x4f\x69\x9b\xf7"
+			  "\xd0\xb6\x96\x0c\x19\xb3\x08\x01"
+			  "\x65\x7a\x1f\xc7\x31\x86\xdb\xc8"
+			  "\xc1\x99\x8f\xf8\x08\x4a\x9d\x23"
+			  "\x22\xa8\xcf\x27\x01\x01\x88\x93"
+			  "\x9c\x86\x45\xbd\xe0\x51\xca\x52"
+			  "\x84\xba\xfe\x03\xf7\xda\xc5\xce"
+			  "\x3e\x77\x75\x86\xaf\x84\xc8\x05"
+			  "\x44\x01\x0f\x02\xf3\x58\xb0\x06"
+			  "\x5a\xd7\x12\x30\x8d\xdf\x1f\x1f"
+			  "\x0a\xe6\xd2\xea\xf6\x3a\x7a\x99"
+			  "\x63\xe8\xd2\xc1\x4a\x45\x8b\x40"
+			  "\x4d\x0a\xa9\x76\x92\xb3\xda\x87"
+			  "\x36\x33\xf0\x78\xc3\x2f\x5f\x02"
+			  "\x1a\x6a\x2c\x32\xcd\x76\xbf\xbd"
+			  "\x5a\x26\x20\x28\x8c\x8c\xbc\x52"
+			  "\x3d\x0a\xc9\xcb\xab\xa4\x21\xb0"
+			  "\x54\x40\x81\x44\xc7\xd6\x1c\x11"
+			  "\x44\xc6\x02\x92\x14\x5a\xbf\x1a"
+			  "\x09\x8a\x18\xad\xcd\x64\x3d\x53"
+			  "\x4a\xb6\xa5\x1b\x57\x0e\xef\xe0"
+			  "\x8c\x44\x5f\x7d\xbd\x6c\xfd\x60"
+			  "\xae\x02\x24\xb6\x99\xdd\x8c\xaf"
+			  "\x59\x39\x75\x3c\xd1\x54\x7b\x86"
+			  "\xcc\x99\xd9\x28\x0c\xb0\x94\x62"
+			  "\xf9\x51\xd1\x19\x96\x2d\x66\xf5"
+			  "\x55\xcf\x9e\x59\xe2\x6b\x2c\x08"
+			  "\xc0\x54\x48\x24\x45\xc3\x8c\x73"
+			  "\xea\x27\x6e\x66\x7d\x1d\x0e\x6e"
+			  "\x13\xe8\x56\x65\x3a\xb0\x81\x5c"
+			  "\xf0\xe8\xd8\x00\x6b\xcd\x8f\xad"
+			  "\xdd\x53\xf3\xa4\x6c\x43\xd6\x31"
+			  "\xaf\xd2\x76\x1e\x91\x12\xdb\x3c"
+			  "\x8c\xc2\x81\xf0\x49\xdb\xe2\x6b"
+			  "\x76\x62\x0a\x04\xe4\xaa\x8a\x7c"
+			  "\x08\x0b\x5d\xd0\xee\x1d\xfb\xc4"
+			  "\x02\x75\x42\xd6\xba\xa7\x22\xa8"
+			  "\x47\x29\xb7\x85\x6d\x93\x3a\xdb"
+			  "\x00\x53\x0b\xa2\xeb\xf8\xfe\x01"
+			  "\x6f\x8a\x31\xd6\x17\x05\x6f\x67"
+			  "\x88\x95\x32\xfe\x4f\xa6\x4b\xf8"
+			  "\x03\xe4\xcd\x9a\x18\xe8\x4e\x2d"
+			  "\xf7\x97\x9a\x0c\x7d\x9f\x7e\x44"
+			  "\x69\x51\xe0\x32\x6b\x62\x86\x8f"
+			  "\xa6\x8e\x0b\x21\x96\xe5\xaf\x77"
+			  "\xc0\x83\xdf\xa5\x0e\xd0\xa1\x04"
+			  "\xaf\xc1\x10\xcb\x5a\x40\xe4\xe3"
+			  "\x38\x7e\x07\xe8\x4d\xfa\xed\xc5"
+			  "\xf0\x37\xdf\xbb\x8a\xcf\x3d\xdc"
+			  "\x61\xd2\xc6\x2b\xff\x07\xc9\x2f"
+			  "\x0c\x2d\x5c\x07\xa8\x35\x6a\xfc"
+			  "\xae\x09\x03\x45\x74\x51\x4d\xc4"
+			  "\xb8\x23\x87\x4a\x99\x27\x20\x87"
+			  "\x62\x44\x0a\x4a\xce\x78\x47\x22",
+		.ksize	= 1088,
+		.plaintext	= "\x8e\xb0\x4c\xde\x9c\x4a\x04\x5a"
+			  "\xf6\xa9\x7f\x45\x25\xa5\x7b\x3a"
+			  "\xbc\x4d\x73\x39\x81\xb5\xbd\x3d"
+			  "\x21\x6f\xd7\x37\x50\x3c\x7b\x28"
+			  "\xd1\x03\x3a\x17\xed\x7b\x7c\x2a"
+			  "\x16\xbc\xdf\x19\x89\x52\x71\x31"
+			  "\xb6\xc0\xfd\xb5\xd3\xba\x96\x99"
+			  "\xb6\x34\x0b\xd0\x99\x93\xfc\x1a"
+			  "\x01\x3c\x85\xc6\x9b\x78\x5c\x8b"
+			  "\xfe\xae\xd2\xbf\xb2\x6f\xf9\xed"
+			  "\xc8\x25\x17\xfe\x10\x3b\x7d\xda"
+			  "\xf4\x8d\x35\x4b\x7c\x7b\x82\xe7"
+			  "\xc2\xb3\xee\x60\x4a\x03\x86\xc9"
+			  "\x4e\xb5\xc4\xbe\xd2\xbd\x66\xf1"
+			  "\x13\xf1\x09\xab\x5d\xca\x63\x1f"
+			  "\xfc\xfb\x57\x2a\xfc\xca\x66\xd8"
+			  "\x77\x84\x38\x23\x1d\xac\xd3\xb3"
+			  "\x7a\xad\x4c\x70\xfa\x9c\xc9\x61"
+			  "\xa6\x1b\xba\x33\x4b\x4e\x33\xec"
+			  "\xa0\xa1\x64\x39\x40\x05\x1c\xc2"
+			  "\x3f\x49\x9d\xae\xf2\xc5\xf2\xc5"
+			  "\xfe\xe8\xf4\xc2\xf9\x96\x2d\x28"
+			  "\x92\x30\x44\xbc\xd2\x7f\xe1\x6e"
+			  "\x62\x02\x8f\x3d\x1c\x80\xda\x0e"
+			  "\x6a\x90\x7e\x75\xff\xec\x3e\xc4"
+			  "\xcd\x16\x34\x3b\x05\x6d\x4d\x20"
+			  "\x1c\x7b\xf5\x57\x4f\xfa\x3d\xac"
+			  "\xd0\x13\x55\xe8\xb3\xe1\x1b\x78"
+			  "\x30\xe6\x9f\x84\xd4\x69\xd1\x08"
+			  "\x12\x77\xa7\x4a\xbd\xc0\xf2\xd2"
+			  "\x78\xdd\xa3\x81\x12\xcb\x6c\x14"
+			  "\x90\x61\xe2\x84\xc6\x2b\x16\xcc"
+			  "\x40\x99\x50\x88\x01\x09\x64\x4f"
+			  "\x0a\x80\xbe\x61\xae\x46\xc9\x0a"
+			  "\x5d\xe0\xfb\x72\x7a\x1a\xdd\x61"
+			  "\x63\x20\x05\xa0\x4a\xf0\x60\x69"
+			  "\x7f\x92\xbc\xbf\x4e\x39\x4d\xdd"
+			  "\x74\xd1\xb7\xc0\x5a\x34\xb7\xae"
+			  "\x76\x65\x2e\xbc\x36\xb9\x04\x95"
+			  "\x42\xe9\x6f\xca\x78\xb3\x72\x07"
+			  "\xa3\xba\x02\x94\x67\x4c\xb1\xd7"
+			  "\xe9\x30\x0d\xf0\x3b\xb8\x10\x6d"
+			  "\xea\x2b\x21\xbf\x74\x59\x82\x97"
+			  "\x85\xaa\xf1\xd7\x54\x39\xeb\x05"
+			  "\xbd\xf3\x40\xa0\x97\xe6\x74\xfe"
+			  "\xb4\x82\x5b\xb1\x36\xcb\xe8\x0d"
+			  "\xce\x14\xd9\xdf\xf1\x94\x22\xcd"
+			  "\xd6\x00\xba\x04\x4c\x05\x0c\xc0"
+			  "\xd1\x5a\xeb\x52\xd5\xa8\x8e\xc8"
+			  "\x97\xa1\xaa\xc1\xea\xc1\xbe\x7c"
+			  "\x36\xb3\x36\xa0\xc6\x76\x66\xc5"
+			  "\xe2\xaf\xd6\x5c\xe2\xdb\x2c\xb3"
+			  "\x6c\xb9\x99\x7f\xff\x9f\x03\x24"
+			  "\xe1\x51\x44\x66\xd8\x0c\x5d\x7f"
+			  "\x5c\x85\x22\x2a\xcf\x6d\x79\x28"
+			  "\xab\x98\x01\x72\xfe\x80\x87\x5f"
+			  "\x46\xba\xef\x81\x24\xee\xbf\xb0"
+			  "\x24\x74\xa3\x65\x97\x12\xc4\xaf"
+			  "\x8b\xa0\x39\xda\x8a\x7e\x74\x6e"
+			  "\x1b\x42\xb4\x44\x37\xfc\x59\xfd"
+			  "\x86\xed\xfb\x8c\x66\x33\xda\x63"
+			  "\x75\xeb\xe1\xa4\x85\x4f\x50\x8f"
+			  "\x83\x66\x0d\xd3\x37\xfa\xe6\x9c"
+			  "\x4f\x30\x87\x35\x18\xe3\x0b\xb7"
+			  "\x6e\x64\x54\xcd\x70\xb3\xde\x54"
+			  "\xb7\x1d\xe6\x4c\x4d\x55\x12\x12"
+			  "\xaf\x5f\x7f\x5e\xee\x9d\xe8\x8e"
+			  "\x32\x9d\x4e\x75\xeb\xc6\xdd\xaa"
+			  "\x48\x82\xa4\x3f\x3c\xd7\xd3\xa8"
+			  "\x63\x9e\x64\xfe\xe3\x97\x00\x62"
+			  "\xe5\x40\x5d\xc3\xad\x72\xe1\x28"
+			  "\x18\x50\xb7\x75\xef\xcd\x23\xbf"
+			  "\x3f\xc0\x51\x36\xf8\x41\xc3\x08"
+			  "\xcb\xf1\x8d\x38\x34\xbd\x48\x45"
+			  "\x75\xed\xbc\x65\x7b\xb5\x0c\x9b"
+			  "\xd7\x67\x7d\x27\xb4\xc4\x80\xd7"
+			  "\xa9\xb9\xc7\x4a\x97\xaa\xda\xc8"
+			  "\x3c\x74\xcf\x36\x8f\xe4\x41\xe3"
+			  "\xd4\xd3\x26\xa7\xf3\x23\x9d\x8f"
+			  "\x6c\x20\x05\x32\x3e\xe0\xc3\xc8"
+			  "\x56\x3f\xa7\x09\xb7\xfb\xc7\xf7"
+			  "\xbe\x2a\xdd\x0f\x06\x7b\x0d\xdd"
+			  "\xb0\xb4\x86\x17\xfd\xb9\x04\xe5"
+			  "\xc0\x64\x5d\xad\x2a\x36\x38\xdb"
+			  "\x24\xaf\x5b\xff\xca\xf9\x41\xe8"
+			  "\xf9\x2f\x1e\x5e\xf9\xf5\xd5\xf2"
+			  "\xb2\x88\xca\xc9\xa1\x31\xe2\xe8"
+			  "\x10\x95\x65\xbf\xf1\x11\x61\x7a"
+			  "\x30\x1a\x54\x90\xea\xd2\x30\xf6"
+			  "\xa5\xad\x60\xf9\x4d\x84\x21\x1b"
+			  "\xe4\x42\x22\xc8\x12\x4b\xb0\x58"
+			  "\x3e\x9c\x2d\x32\x95\x0a\x8e\xb0"
+			  "\x0a\x7e\x77\x2f\xe8\x97\x31\x6a"
+			  "\xf5\x59\xb4\x26\xe6\x37\x12\xc9"
+			  "\xcb\xa0\x58\x33\x6f\xd5\x55\x55"
+			  "\x3c\xa1\x33\xb1\x0b\x7e\x2e\xb4"
+			  "\x43\x2a\x84\x39\xf0\x9c\xf4\x69"
+			  "\x4f\x1e\x79\xa6\x15\x1b\x87\xbb"
+			  "\xdb\x9b\xe0\xf1\x0b\xba\xe3\x6e"
+			  "\xcc\x2f\x49\x19\x22\x29\xfc\x71"
+			  "\xbb\x77\x38\x18\x61\xaf\x85\x76"
+			  "\xeb\xd1\x09\xcc\x86\x04\x20\x9a"
+			  "\x66\x53\x2f\x44\x8b\xc6\xa3\xd2"
+			  "\x5f\xc7\x79\x82\x66\xa8\x6e\x75"
+			  "\x7d\x94\xd1\x86\x75\x0f\xa5\x4f"
+			  "\x3c\x7a\x33\xce\xd1\x6e\x9d\x7b"
+			  "\x1f\x91\x37\xb8\x37\x80\xfb\xe0"
+			  "\x52\x26\xd0\x9a\xd4\x48\x02\x41"
+			  "\x05\xe3\x5a\x94\xf1\x65\x61\x19"
+			  "\xb8\x88\x4e\x2b\xea\xba\x8b\x58"
+			  "\x8b\x42\x01\x00\xa8\xfe\x00\x5c"
+			  "\xfe\x1c\xee\x31\x15\x69\xfa\xb3"
+			  "\x9b\x5f\x22\x8e\x0d\x2c\xe3\xa5"
+			  "\x21\xb9\x99\x8a\x8e\x94\x5a\xef"
+			  "\x13\x3e\x99\x96\x79\x6e\xd5\x42"
+			  "\x36\x03\xa9\xe2\xca\x65\x4e\x8a"
+			  "\x8a\x30\xd2\x7d\x74\xe7\xf0\xaa"
+			  "\x23\x26\xdd\xcb\x82\x39\xfc\x9d"
+			  "\x51\x76\x21\x80\xa2\xbe\x93\x03"
+			  "\x47\xb0\xc1\xb6\xdc\x63\xfd\x9f"
+			  "\xca\x9d\xa5\xca\x27\x85\xe2\xd8"
+			  "\x15\x5b\x7e\x14\x7a\xc4\x89\xcc"
+			  "\x74\x14\x4b\x46\xd2\xce\xac\x39"
+			  "\x6b\x6a\x5a\xa4\x0e\xe3\x7b\x15"
+			  "\x94\x4b\x0f\x74\xcb\x0c\x7f\xa9"
+			  "\xbe\x09\x39\xa3\xdd\x56\x5c\xc7"
+			  "\x99\x56\x65\x39\xf4\x0b\x7d\x87"
+			  "\xec\xaa\xe3\x4d\x22\x65\x39\x4e",
+		.psize	= 1024,
+		.digest	= "\x64\x3a\xbc\xc3\x3f\x74\x40\x51"
+			  "\x6e\x56\x01\x1a\x51\xec\x36\xde",
+		.np	= 8,
+		.tap	= { 64, 203, 267, 28, 263, 62, 54, 83 },
+	}, {
+		.key	= "\x1b\x82\x2e\x1b\x17\x23\xb9\x6d"
+			  "\xdc\x9c\xda\x99\x07\xe3\x5f\xd8"
+			  "\xd2\xf8\x43\x80\x8d\x86\x7d\x80"
+			  "\x1a\xd0\xcc\x13\xb9\x11\x05\x3f"
+			  "\x7e\xcf\x7e\x80\x0e\xd8\x25\x48"
+			  "\x8b\xaa\x63\x83\x92\xd0\x72\xf5"
+			  "\x4f\x67\x7e\x50\x18\x25\xa4\xd1"
+			  "\xe0\x7e\x1e\xba\xd8\xa7\x6e\xdb"
+			  "\x1a\xcc\x0d\xfe\x9f\x6d\x22\x35"
+			  "\xe1\xe6\xe0\xa8\x7b\x9c\xb1\x66"
+			  "\xa3\xf8\xff\x4d\x90\x84\x28\xbc"
+			  "\xdc\x19\xc7\x91\x49\xfc\xf6\x33"
+			  "\xc9\x6e\x65\x7f\x28\x6f\x68\x2e"
+			  "\xdf\x1a\x75\xe9\xc2\x0c\x96\xb9"
+			  "\x31\x22\xc4\x07\xc6\x0a\x2f\xfd"
+			  "\x36\x06\x5f\x5c\xc5\xb1\x3a\xf4"
+			  "\x5e\x48\xa4\x45\x2b\x88\xa7\xee"
+			  "\xa9\x8b\x52\xcc\x99\xd9\x2f\xb8"
+			  "\xa4\x58\x0a\x13\xeb\x71\x5a\xfa"
+			  "\xe5\x5e\xbe\xf2\x64\xad\x75\xbc"
+			  "\x0b\x5b\x34\x13\x3b\x23\x13\x9a"
+			  "\x69\x30\x1e\x9a\xb8\x03\xb8\x8b"
+			  "\x3e\x46\x18\x6d\x38\xd9\xb3\xd8"
+			  "\xbf\xf1\xd0\x28\xe6\x51\x57\x80"
+			  "\x5e\x99\xfb\xd0\xce\x1e\x83\xf7"
+			  "\xe9\x07\x5a\x63\xa9\xef\xce\xa5"
+			  "\xfb\x3f\x37\x17\xfc\x0b\x37\x0e"
+			  "\xbb\x4b\x21\x62\xb7\x83\x0e\xa9"
+			  "\x9e\xb0\xc4\xad\x47\xbe\x35\xe7"
+			  "\x51\xb2\xf2\xac\x2b\x65\x7b\x48"
+			  "\xe3\x3f\x5f\xb6\x09\x04\x0c\x58"
+			  "\xce\x99\xa9\x15\x2f\x4e\xc1\xf2"
+			  "\x24\x48\xc0\xd8\x6c\xd3\x76\x17"
+			  "\x83\x5d\xe6\xe3\xfd\x01\x8e\xf7"
+			  "\x42\xa5\x04\x29\x30\xdf\xf9\x00"
+			  "\x4a\xdc\x71\x22\x1a\x33\x15\xb6"
+			  "\xd7\x72\xfb\x9a\xb8\xeb\x2b\x38"
+			  "\xea\xa8\x61\xa8\x90\x11\x9d\x73"
+			  "\x2e\x6c\xce\x81\x54\x5a\x9f\xcd"
+			  "\xcf\xd5\xbd\x26\x5d\x66\xdb\xfb"
+			  "\xdc\x1e\x7c\x10\xfe\x58\x82\x10"
+			  "\x16\x24\x01\xce\x67\x55\x51\xd1"
+			  "\xdd\x6b\x44\xa3\x20\x8e\xa9\xa6"
+			  "\x06\xa8\x29\x77\x6e\x00\x38\x5b"
+			  "\xde\x4d\x58\xd8\x1f\x34\xdf\xf9"
+			  "\x2c\xac\x3e\xad\xfb\x92\x0d\x72"
+			  "\x39\xa4\xac\x44\x10\xc0\x43\xc4"
+			  "\xa4\x77\x3b\xfc\xc4\x0d\x37\xd3"
+			  "\x05\x84\xda\x53\x71\xf8\x80\xd3"
+			  "\x34\x44\xdb\x09\xb4\x2b\x8e\xe3"
+			  "\x00\x75\x50\x9e\x43\x22\x00\x0b"
+			  "\x7c\x70\xab\xd4\x41\xf1\x93\xcd"
+			  "\x25\x2d\x84\x74\xb5\xf2\x92\xcd"
+			  "\x0a\x28\xea\x9a\x49\x02\x96\xcb"
+			  "\x85\x9e\x2f\x33\x03\x86\x1d\xdc"
+			  "\x1d\x31\xd5\xfc\x9d\xaa\xc5\xe9"
+			  "\x9a\xc4\x57\xf5\x35\xed\xf4\x4b"
+			  "\x3d\x34\xc2\x29\x13\x86\x36\x42"
+			  "\x5d\xbf\x90\x86\x13\x77\xe5\xc3"
+			  "\x62\xb4\xfe\x0b\x70\x39\x35\x65"
+			  "\x02\xea\xf6\xce\x57\x0c\xbb\x74"
+			  "\x29\xe3\xfd\x60\x90\xfd\x10\x38"
+			  "\xd5\x4e\x86\xbd\x37\x70\xf0\x97"
+			  "\xa6\xab\x3b\x83\x64\x52\xca\x66"
+			  "\x2f\xf9\xa4\xca\x3a\x55\x6b\xb0"
+			  "\xe8\x3a\x34\xdb\x9e\x48\x50\x2f"
+			  "\x3b\xef\xfd\x08\x2d\x5f\xc1\x37"
+			  "\x5d\xbe\x73\xe4\xd8\xe9\xac\xca"
+			  "\x8a\xaa\x48\x7c\x5c\xf4\xa6\x96"
+			  "\x5f\xfa\x70\xa6\xb7\x8b\x50\xcb"
+			  "\xa6\xf5\xa9\xbd\x7b\x75\x4c\x22"
+			  "\x0b\x19\x40\x2e\xc9\x39\x39\x32"
+			  "\x83\x03\xa8\xa4\x98\xe6\x8e\x16"
+			  "\xb9\xde\x08\xc5\xfc\xbf\xad\x39"
+			  "\xa8\xc7\x93\x6c\x6f\x23\xaf\xc1"
+			  "\xab\xe1\xdf\xbb\x39\xae\x93\x29"
+			  "\x0e\x7d\x80\x8d\x3e\x65\xf3\xfd"
+			  "\x96\x06\x65\x90\xa1\x28\x64\x4b"
+			  "\x69\xf9\xa8\x84\x27\x50\xfc\x87"
+			  "\xf7\xbf\x55\x8e\x56\x13\x58\x7b"
+			  "\x85\xb4\x6a\x72\x0f\x40\xf1\x4f"
+			  "\x83\x81\x1f\x76\xde\x15\x64\x7a"
+			  "\x7a\x80\xe4\xc7\x5e\x63\x01\x91"
+			  "\xd7\x6b\xea\x0b\x9b\xa2\x99\x3b"
+			  "\x6c\x88\xd8\xfd\x59\x3c\x8d\x22"
+			  "\x86\x56\xbe\xab\xa1\x37\x08\x01"
+			  "\x50\x85\x69\x29\xee\x9f\xdf\x21"
+			  "\x3e\x20\x20\xf5\xb0\xbb\x6b\xd0"
+			  "\x9c\x41\x38\xec\x54\x6f\x2d\xbd"
+			  "\x0f\xe1\xbd\xf1\x2b\x6e\x60\x56"
+			  "\x29\xe5\x7a\x70\x1c\xe2\xfc\x97"
+			  "\x82\x68\x67\xd9\x3d\x1f\xfb\xd8"
+			  "\x07\x9f\xbf\x96\x74\xba\x6a\x0e"
+			  "\x10\x48\x20\xd8\x13\x1e\xb5\x44"
+			  "\xf2\xcc\xb1\x8b\xfb\xbb\xec\xd7"
+			  "\x37\x70\x1f\x7c\x55\xd2\x4b\xb9"
+			  "\xfd\x70\x5e\xa3\x91\x73\x63\x52"
+			  "\x13\x47\x5a\x06\xfb\x01\x67\xa5"
+			  "\xc0\xd0\x49\x19\x56\x66\x9a\x77"
+			  "\x64\xaf\x8c\x25\x91\x52\x87\x0e"
+			  "\x18\xf3\x5f\x97\xfd\x71\x13\xf8"
+			  "\x05\xa5\x39\xcc\x65\xd3\xcc\x63"
+			  "\x5b\xdb\x5f\x7e\x5f\x6e\xad\xc4"
+			  "\xf4\xa0\xc5\xc2\x2b\x4d\x97\x38"
+			  "\x4f\xbc\xfa\x33\x17\xb4\x47\xb9"
+			  "\x43\x24\x15\x8d\xd2\xed\x80\x68"
+			  "\x84\xdb\x04\x80\xca\x5e\x6a\x35"
+			  "\x2c\x2c\xe7\xc5\x03\x5f\x54\xb0"
+			  "\x5e\x4f\x1d\x40\x54\x3d\x78\x9a"
+			  "\xac\xda\x80\x27\x4d\x15\x4c\x1a"
+			  "\x6e\x80\xc9\xc4\x3b\x84\x0e\xd9"
+			  "\x2e\x93\x01\x8c\xc3\xc8\x91\x4b"
+			  "\xb3\xaa\x07\x04\x68\x5b\x93\xa5"
+			  "\xe7\xc4\x9d\xe7\x07\xee\xf5\x3b"
+			  "\x40\x89\xcc\x60\x34\x9d\xb4\x06"
+			  "\x1b\xef\x92\xe6\xc1\x2a\x7d\x0f"
+			  "\x81\xaa\x56\xe3\xd7\xed\xa7\xd4"
+			  "\xa7\x3a\x49\xc4\xad\x81\x5c\x83"
+			  "\x55\x8e\x91\x54\xb7\x7d\x65\xa5"
+			  "\x06\x16\xd5\x9a\x16\xc1\xb0\xa2"
+			  "\x06\xd8\x98\x47\x73\x7e\x73\xa0"
+			  "\xb8\x23\xb1\x52\xbf\x68\x74\x5d"
+			  "\x0b\xcb\xfa\x8c\x46\xe3\x24\xe6"
+			  "\xab\xd4\x69\x8d\x8c\xf2\x8a\x59"
+			  "\xbe\x48\x46\x50\x8c\x9a\xe8\xe3"
+			  "\x31\x55\x0a\x06\xed\x4f\xf8\xb7"
+			  "\x4f\xe3\x85\x17\x30\xbd\xd5\x20"
+			  "\xe7\x5b\xb2\x32\xcf\x6b\x16\x44"
+			  "\xd2\xf5\x7e\xd7\xd1\x2f\xee\x64"
+			  "\x3e\x9d\x10\xef\x27\x35\x43\x64"
+			  "\x67\xfb\x7a\x7b\xe0\x62\x31\x9a"
+			  "\x4d\xdf\xa5\xab\xc0\x20\xbb\x01"
+			  "\xe9\x7b\x54\xf1\xde\xb2\x79\x50"
+			  "\x6c\x4b\x91\xdb\x7f\xbb\x50\xc1"
+			  "\x55\x44\x38\x9a\xe0\x9f\xe8\x29"
+			  "\x6f\x15\xf8\x4e\xa6\xec\xa0\x60",
+		.ksize	= 1088,
+		.plaintext	= "\x15\x68\x9e\x2f\xad\x15\x52\xdf"
+			  "\xf0\x42\x62\x24\x2a\x2d\xea\xbf"
+			  "\xc7\xf3\xb4\x1a\xf5\xed\xb2\x08"
+			  "\x15\x60\x1c\x00\x77\xbf\x0b\x0e"
+			  "\xb7\x2c\xcf\x32\x3a\xc7\x01\x77"
+			  "\xef\xa6\x75\xd0\x29\xc7\x68\x20"
+			  "\xb2\x92\x25\xbf\x12\x34\xe9\xa4"
+			  "\xfd\x32\x7b\x3f\x7c\xbd\xa5\x02"
+			  "\x38\x41\xde\xc9\xc1\x09\xd9\xfc"
+			  "\x6e\x78\x22\x83\x18\xf7\x50\x8d"
+			  "\x8f\x9c\x2d\x02\xa5\x30\xac\xff"
+			  "\xea\x63\x2e\x80\x37\x83\xb0\x58"
+			  "\xda\x2f\xef\x21\x55\xba\x7b\xb1"
+			  "\xb6\xed\xf5\xd2\x4d\xaa\x8c\xa9"
+			  "\xdd\xdb\x0f\xb4\xce\xc1\x9a\xb1"
+			  "\xc1\xdc\xbd\xab\x86\xc2\xdf\x0b"
+			  "\xe1\x2c\xf9\xbe\xf6\xd8\xda\x62"
+			  "\x72\xdd\x98\x09\x52\xc0\xc4\xb6"
+			  "\x7b\x17\x5c\xf5\xd8\x4b\x88\xd6"
+			  "\x6b\xbf\x84\x4a\x3f\xf5\x4d\xd2"
+			  "\x94\xe2\x9c\xff\xc7\x3c\xd9\xc8"
+			  "\x37\x38\xbc\x8c\xf3\xe7\xb7\xd0"
+			  "\x1d\x78\xc4\x39\x07\xc8\x5e\x79"
+			  "\xb6\x5a\x90\x5b\x6e\x97\xc9\xd4"
+			  "\x82\x9c\xf3\x83\x7a\xe7\x97\xfc"
+			  "\x1d\xbb\xef\xdb\xce\xe0\x82\xad"
+			  "\xca\x07\x6c\x54\x62\x6f\x81\xe6"
+			  "\x7a\x5a\x96\x6e\x80\x3a\xa2\x37"
+			  "\x6f\xc6\xa4\x29\xc3\x9e\x19\x94"
+			  "\x9f\xb0\x3e\x38\xfb\x3c\x2b\x7d"
+			  "\xaa\xb8\x74\xda\x54\x23\x51\x12"
+			  "\x4b\x96\x36\x8f\x91\x4f\x19\x37"
+			  "\x83\xc9\xdd\xc7\x1a\x32\x2d\xab"
+			  "\xc7\x89\xe2\x07\x47\x6c\xe8\xa6"
+			  "\x70\x6b\x8e\x0c\xda\x5c\x6a\x59"
+			  "\x27\x33\x0e\xe1\xe1\x20\xe8\xc8"
+			  "\xae\xdc\xd0\xe3\x6d\xa8\xa6\x06"
+			  "\x41\xb4\xd4\xd4\xcf\x91\x3e\x06"
+			  "\xb0\x9a\xf7\xf1\xaa\xa6\x23\x92"
+			  "\x10\x86\xf0\x94\xd1\x7c\x2e\x07"
+			  "\x30\xfb\xc5\xd8\xf3\x12\xa9\xe8"
+			  "\x22\x1c\x97\x1a\xad\x96\xb0\xa1"
+			  "\x72\x6a\x6b\xb4\xfd\xf7\xe8\xfa"
+			  "\xe2\x74\xd8\x65\x8d\x35\x17\x4b"
+			  "\x00\x23\x5c\x8c\x70\xad\x71\xa2"
+			  "\xca\xc5\x6c\x59\xbf\xb4\xc0\x6d"
+			  "\x86\x98\x3e\x19\x5a\x90\x92\xb1"
+			  "\x66\x57\x6a\x91\x68\x7c\xbc\xf3"
+			  "\xf1\xdb\x94\xf8\x48\xf1\x36\xd8"
+			  "\x78\xac\x1c\xa9\xcc\xd6\x27\xba"
+			  "\x91\x54\x22\xf5\xe6\x05\x3f\xcc"
+			  "\xc2\x8f\x2c\x3b\x2b\xc3\x2b\x2b"
+			  "\x3b\xb8\xb6\x29\xb7\x2f\x94\xb6"
+			  "\x7b\xfc\x94\x3e\xd0\x7a\x41\x59"
+			  "\x7b\x1f\x9a\x09\xa6\xed\x4a\x82"
+			  "\x9d\x34\x1c\xbd\x4e\x1c\x3a\x66"
+			  "\x80\x74\x0e\x9a\x4f\x55\x54\x47"
+			  "\x16\xba\x2a\x0a\x03\x35\x99\xa3"
+			  "\x5c\x63\x8d\xa2\x72\x8b\x17\x15"
+			  "\x68\x39\x73\xeb\xec\xf2\xe8\xf5"
+			  "\x95\x32\x27\xd6\xc4\xfe\xb0\x51"
+			  "\xd5\x0c\x50\xc5\xcd\x6d\x16\xb3"
+			  "\xa3\x1e\x95\x69\xad\x78\x95\x06"
+			  "\xb9\x46\xf2\x6d\x24\x5a\x99\x76"
+			  "\x73\x6a\x91\xa6\xac\x12\xe1\x28"
+			  "\x79\xbc\x08\x4e\x97\x00\x98\x63"
+			  "\x07\x1c\x4e\xd1\x68\xf3\xb3\x81"
+			  "\xa8\xa6\x5f\xf1\x01\xc9\xc1\xaf"
+			  "\x3a\x96\xf9\x9d\xb5\x5a\x5f\x8f"
+			  "\x7e\xc1\x7e\x77\x0a\x40\xc8\x8e"
+			  "\xfc\x0e\xed\xe1\x0d\xb0\xe5\x5e"
+			  "\x5e\x6f\xf5\x7f\xab\x33\x7d\xcd"
+			  "\xf0\x09\x4b\xb2\x11\x37\xdc\x65"
+			  "\x97\x32\x62\x71\x3a\x29\x54\xb9"
+			  "\xc7\xa4\xbf\x75\x0f\xf9\x40\xa9"
+			  "\x8d\xd7\x8b\xa7\xe0\x9a\xbe\x15"
+			  "\xc6\xda\xd8\x00\x14\x69\x1a\xaf"
+			  "\x5f\x79\xc3\xf5\xbb\x6c\x2a\x9d"
+			  "\xdd\x3c\x5f\x97\x21\xe1\x3a\x03"
+			  "\x84\x6a\xe9\x76\x11\x1f\xd3\xd5"
+			  "\xf0\x54\x20\x4d\xc2\x91\xc3\xa4"
+			  "\x36\x25\xbe\x1b\x2a\x06\xb7\xf3"
+			  "\xd1\xd0\x55\x29\x81\x4c\x83\xa3"
+			  "\xa6\x84\x1e\x5c\xd1\xd0\x6c\x90"
+			  "\xa4\x11\xf0\xd7\x63\x6a\x48\x05"
+			  "\xbc\x48\x18\x53\xcd\xb0\x8d\xdb"
+			  "\xdc\xfe\x55\x11\x5c\x51\xb3\xab"
+			  "\xab\x63\x3e\x31\x5a\x8b\x93\x63"
+			  "\x34\xa9\xba\x2b\x69\x1a\xc0\xe3"
+			  "\xcb\x41\xbc\xd7\xf5\x7f\x82\x3e"
+			  "\x01\xa3\x3c\x72\xf4\xfe\xdf\xbe"
+			  "\xb1\x67\x17\x2b\x37\x60\x0d\xca"
+			  "\x6f\xc3\x94\x2c\xd2\x92\x6d\x9d"
+			  "\x75\x18\x77\xaa\x29\x38\x96\xed"
+			  "\x0e\x20\x70\x92\xd5\xd0\xb4\x00"
+			  "\xc0\x31\xf2\xc9\x43\x0e\x75\x1d"
+			  "\x4b\x64\xf2\x1f\xf2\x29\x6c\x7b"
+			  "\x7f\xec\x59\x7d\x8c\x0d\xd4\xd3"
+			  "\xac\x53\x4c\xa3\xde\x42\x92\x95"
+			  "\x6d\xa3\x4f\xd0\xe6\x3d\xe7\xec"
+			  "\x7a\x4d\x68\xf1\xfe\x67\x66\x09"
+			  "\x83\x22\xb1\x98\x43\x8c\xab\xb8"
+			  "\x45\xe6\x6d\xdf\x5e\x50\x71\xce"
+			  "\xf5\x4e\x40\x93\x2b\xfa\x86\x0e"
+			  "\xe8\x30\xbd\x82\xcc\x1c\x9c\x5f"
+			  "\xad\xfd\x08\x31\xbe\x52\xe7\xe6"
+			  "\xf2\x06\x01\x62\x25\x15\x99\x74"
+			  "\x33\x51\x52\x57\x3f\x57\x87\x61"
+			  "\xb9\x7f\x29\x3d\xcd\x92\x5e\xa6"
+			  "\x5c\x3b\xf1\xed\x5f\xeb\x82\xed"
+			  "\x56\x7b\x61\xe7\xfd\x02\x47\x0e"
+			  "\x2a\x15\xa4\xce\x43\x86\x9b\xe1"
+			  "\x2b\x4c\x2a\xd9\x42\x97\xf7\x9a"
+			  "\xe5\x47\x46\x48\xd3\x55\x6f\x4d"
+			  "\xd9\xeb\x4b\xdd\x7b\x21\x2f\xb3"
+			  "\xa8\x36\x28\xdf\xca\xf1\xf6\xd9"
+			  "\x10\xf6\x1c\xfd\x2e\x0c\x27\xe0"
+			  "\x01\xb3\xff\x6d\x47\x08\x4d\xd4"
+			  "\x00\x25\xee\x55\x4a\xe9\xe8\x5b"
+			  "\xd8\xf7\x56\x12\xd4\x50\xb2\xe5"
+			  "\x51\x6f\x34\x63\x69\xd2\x4e\x96"
+			  "\x4e\xbc\x79\xbf\x18\xae\xc6\x13"
+			  "\x80\x92\x77\xb0\xb4\x0f\x29\x94"
+			  "\x6f\x4c\xbb\x53\x11\x36\xc3\x9f"
+			  "\x42\x8e\x96\x8a\x91\xc8\xe9\xfc"
+			  "\xfe\xbf\x7c\x2d\x6f\xf9\xb8\x44"
+			  "\x89\x1b\x09\x53\x0a\x2a\x92\xc3"
+			  "\x54\x7a\x3a\xf9\xe2\xe4\x75\x87"
+			  "\xa0\x5e\x4b\x03\x7a\x0d\x8a\xf4"
+			  "\x55\x59\x94\x2b\x63\x96\x0e\xf5",
+		.psize	= 1040,
+		.digest	= "\xb5\xb9\x08\xb3\x24\x3e\x03\xf0"
+			  "\xd6\x0b\x57\xbc\x0a\x6d\x89\x59",
+	}, {
+		.key	= "\xf6\x34\x42\x71\x35\x52\x8b\x58"
+			  "\x02\x3a\x8e\x4a\x8d\x41\x13\xe9"
+			  "\x7f\xba\xb9\x55\x9d\x73\x4d\xf8"
+			  "\x3f\x5d\x73\x15\xff\xd3\x9e\x7f"
+			  "\x20\x2a\x6a\xa8\xd1\xf0\x8f\x12"
+			  "\x6b\x02\xd8\x6c\xde\xba\x80\x22"
+			  "\x19\x37\xc8\xd0\x4e\x89\x17\x7c"
+			  "\x7c\xdd\x88\xfd\x41\xc0\x04\xb7"
+			  "\x1d\xac\x19\xe3\x20\xc7\x16\xcf"
+			  "\x58\xee\x1d\x7a\x61\x69\xa9\x12"
+			  "\x4b\xef\x4f\xb6\x38\xdd\x78\xf8"
+			  "\x28\xee\x70\x08\xc7\x7c\xcc\xc8"
+			  "\x1e\x41\xf5\x80\x86\x70\xd0\xf0"
+			  "\xa3\x87\x6b\x0a\x00\xd2\x41\x28"
+			  "\x74\x26\xf1\x24\xf3\xd0\x28\x77"
+			  "\xd7\xcd\xf6\x2d\x61\xf4\xa2\x13"
+			  "\x77\xb4\x6f\xa0\xf4\xfb\xd6\xb5"
+			  "\x38\x9d\x5a\x0c\x51\xaf\xad\x63"
+			  "\x27\x67\x8c\x01\xea\x42\x1a\x66"
+			  "\xda\x16\x7c\x3c\x30\x0c\x66\x53"
+			  "\x1c\x88\xa4\x5c\xb2\xe3\x78\x0a"
+			  "\x13\x05\x6d\xe2\xaf\xb3\xe4\x75"
+			  "\x00\x99\x58\xee\x76\x09\x64\xaa"
+			  "\xbb\x2e\xb1\x81\xec\xd8\x0e\xd3"
+			  "\x0c\x33\x5d\xb7\x98\xef\x36\xb6"
+			  "\xd2\x65\x69\x41\x70\x12\xdc\x25"
+			  "\x41\x03\x99\x81\x41\x19\x62\x13"
+			  "\xd1\x0a\x29\xc5\x8c\xe0\x4c\xf3"
+			  "\xd6\xef\x4c\xf4\x1d\x83\x2e\x6d"
+			  "\x8e\x14\x87\xed\x80\xe0\xaa\xd3"
+			  "\x08\x04\x73\x1a\x84\x40\xf5\x64"
+			  "\xbd\x61\x32\x65\x40\x42\xfb\xb0"
+			  "\x40\xf6\x40\x8d\xc7\x7f\x14\xd0"
+			  "\x83\x99\xaa\x36\x7e\x60\xc6\xbf"
+			  "\x13\x8a\xf9\x21\xe4\x7e\x68\x87"
+			  "\xf3\x33\x86\xb4\xe0\x23\x7e\x0a"
+			  "\x21\xb1\xf5\xad\x67\x3c\x9c\x9d"
+			  "\x09\xab\xaf\x5f\xba\xe0\xd0\x82"
+			  "\x48\x22\x70\xb5\x6d\x53\xd6\x0e"
+			  "\xde\x64\x92\x41\xb0\xd3\xfb\xda"
+			  "\x21\xfe\xab\xea\x20\xc4\x03\x58"
+			  "\x18\x2e\x7d\x2f\x03\xa9\x47\x66"
+			  "\xdf\x7b\xa4\x6b\x34\x6b\x55\x9c"
+			  "\x4f\xd7\x9c\x47\xfb\xa9\x42\xec"
+			  "\x5a\x12\xfd\xfe\x76\xa0\x92\x9d"
+			  "\xfe\x1e\x16\xdd\x24\x2a\xe4\x27"
+			  "\xd5\xa9\xf2\x05\x4f\x83\xa2\xaf"
+			  "\xfe\xee\x83\x7a\xad\xde\xdf\x9a"
+			  "\x80\xd5\x81\x14\x93\x16\x7e\x46"
+			  "\x47\xc2\x14\xef\x49\x6e\xb9\xdb"
+			  "\x40\xe8\x06\x6f\x9c\x2a\xfd\x62"
+			  "\x06\x46\xfd\x15\x1d\x36\x61\x6f"
+			  "\x77\x77\x5e\x64\xce\x78\x1b\x85"
+			  "\xbf\x50\x9a\xfd\x67\xa6\x1a\x65"
+			  "\xad\x5b\x33\x30\xf1\x71\xaa\xd9"
+			  "\x23\x0d\x92\x24\x5f\xae\x57\xb0"
+			  "\x24\x37\x0a\x94\x12\xfb\xb5\xb1"
+			  "\xd3\xb8\x1d\x12\x29\xb0\x80\x24"
+			  "\x2d\x47\x9f\x96\x1f\x95\xf1\xb1"
+			  "\xda\x35\xf6\x29\xe0\xe1\x23\x96"
+			  "\xc7\xe8\x22\x9b\x7c\xac\xf9\x41"
+			  "\x39\x01\xe5\x73\x15\x5e\x99\xec"
+			  "\xb4\xc1\xf4\xe7\xa7\x97\x6a\xd5"
+			  "\x90\x9a\xa0\x1d\xf3\x5a\x8b\x5f"
+			  "\xdf\x01\x52\xa4\x93\x31\x97\xb0"
+			  "\x93\x24\xb5\xbc\xb2\x14\x24\x98"
+			  "\x4a\x8f\x19\x85\xc3\x2d\x0f\x74"
+			  "\x9d\x16\x13\x80\x5e\x59\x62\x62"
+			  "\x25\xe0\xd1\x2f\x64\xef\xba\xac"
+			  "\xcd\x09\x07\x15\x8a\xcf\x73\xb5"
+			  "\x8b\xc9\xd8\x24\xb0\x53\xd5\x6f"
+			  "\xe1\x2b\x77\xb1\xc5\xe4\xa7\x0e"
+			  "\x18\x45\xab\x36\x03\x59\xa8\xbd"
+			  "\x43\xf0\xd8\x2c\x1a\x69\x96\xbb"
+			  "\x13\xdf\x6c\x33\x77\xdf\x25\x34"
+			  "\x5b\xa5\x5b\x8c\xf9\x51\x05\xd4"
+			  "\x8b\x8b\x44\x87\x49\xfc\xa0\x8f"
+			  "\x45\x15\x5b\x40\x42\xc4\x09\x92"
+			  "\x98\x0c\x4d\xf4\x26\x37\x1b\x13"
+			  "\x76\x01\x93\x8d\x4f\xe6\xed\x18"
+			  "\xd0\x79\x7b\x3f\x44\x50\xcb\xee"
+			  "\xf7\x4a\xc9\x9e\xe0\x96\x74\xa7"
+			  "\xe6\x93\xb2\x53\xca\x55\xa8\xdc"
+			  "\x1e\x68\x07\x87\xb7\x2e\xc1\x08"
+			  "\xb2\xa4\x5b\xaf\xc6\xdb\x5c\x66"
+			  "\x41\x1c\x51\xd9\xb0\x07\x00\x0d"
+			  "\xf0\x4c\xdc\x93\xde\xa9\x1e\x8e"
+			  "\xd3\x22\x62\xd8\x8b\x88\x2c\xea"
+			  "\x5e\xf1\x6e\x14\x40\xc7\xbe\xaa"
+			  "\x42\x28\xd0\x26\x30\x78\x01\x9b"
+			  "\x83\x07\xbc\x94\xc7\x57\xa2\x9f"
+			  "\x03\x07\xff\x16\xff\x3c\x6e\x48"
+			  "\x0a\xd0\xdd\x4c\xf6\x64\x9a\xf1"
+			  "\xcd\x30\x12\x82\x2c\x38\xd3\x26"
+			  "\x83\xdb\xab\x3e\xc6\xf8\xe6\xfa"
+			  "\x77\x0a\x78\x82\x75\xf8\x63\x51"
+			  "\x59\xd0\x8d\x24\x9f\x25\xe6\xa3"
+			  "\x4c\xbc\x34\xfc\xe3\x10\xc7\x62"
+			  "\xd4\x23\xc8\x3d\xa7\xc6\xa6\x0a"
+			  "\x4f\x7e\x29\x9d\x6d\xbe\xb5\xf1"
+			  "\xdf\xa4\x53\xfa\xc0\x23\x0f\x37"
+			  "\x84\x68\xd0\xb5\xc8\xc6\xae\xf8"
+			  "\xb7\x8d\xb3\x16\xfe\x8f\x87\xad"
+			  "\xd0\xc1\x08\xee\x12\x1c\x9b\x1d"
+			  "\x90\xf8\xd1\x63\xa4\x92\x3c\xf0"
+			  "\xc7\x34\xd8\xf1\x14\xed\xa3\xbc"
+			  "\x17\x7e\xd4\x62\x42\x54\x57\x2c"
+			  "\x3e\x7a\x35\x35\x17\x0f\x0b\x7f"
+			  "\x81\xa1\x3f\xd0\xcd\xc8\x3b\x96"
+			  "\xe9\xe0\x4a\x04\xe1\xb6\x3c\xa1"
+			  "\xd6\xca\xc4\xbd\xb6\xb5\x95\x34"
+			  "\x12\x9d\xc5\x96\xf2\xdf\xba\x54"
+			  "\x76\xd1\xb2\x6b\x3b\x39\xe0\xb9"
+			  "\x18\x62\xfb\xf7\xfc\x12\xf1\x5f"
+			  "\x7e\xc7\xe3\x59\x4c\xa6\xc2\x3d"
+			  "\x40\x15\xf9\xa3\x95\x64\x4c\x74"
+			  "\x8b\x73\x77\x33\x07\xa7\x04\x1d"
+			  "\x33\x5a\x7e\x8f\xbd\x86\x01\x4f"
+			  "\x3e\xb9\x27\x6f\xe2\x41\xf7\x09"
+			  "\x67\xfd\x29\x28\xc5\xe4\xf6\x18"
+			  "\x4c\x1b\x49\xb2\x9c\x5b\xf6\x81"
+			  "\x4f\xbb\x5c\xcc\x0b\xdf\x84\x23"
+			  "\x58\xd6\x28\x34\x93\x3a\x25\x97"
+			  "\xdf\xb2\xc3\x9e\x97\x38\x0b\x7d"
+			  "\x10\xb3\x54\x35\x23\x8c\x64\xee"
+			  "\xf0\xd8\x66\xff\x8b\x22\xd2\x5b"
+			  "\x05\x16\x3c\x89\xf7\xb1\x75\xaf"
+			  "\xc0\xae\x6a\x4f\x3f\xaf\x9a\xf4"
+			  "\xf4\x9a\x24\xd9\x80\x82\xc0\x12"
+			  "\xde\x96\xd1\xbe\x15\x0b\x8d\x6a"
+			  "\xd7\x12\xe4\x85\x9f\x83\xc9\xc3"
+			  "\xff\x0b\xb5\xaf\x3b\xd8\x6d\x67"
+			  "\x81\x45\xe6\xac\xec\xc1\x7b\x16"
+			  "\x18\x0a\xce\x4b\xc0\x2e\x76\xbc"
+			  "\x1b\xfa\xb4\x34\xb8\xfc\x3e\xc8"
+			  "\x5d\x90\x71\x6d\x7a\x79\xef\x06",
+		.ksize	= 1088,
+		.plaintext	= "\xaa\x5d\x54\xcb\xea\x1e\x46\x0f"
+			  "\x45\x87\x70\x51\x8a\x66\x7a\x33"
+			  "\xb4\x18\xff\xa9\x82\xf9\x45\x4b"
+			  "\x93\xae\x2e\x7f\xab\x98\xfe\xbf"
+			  "\x01\xee\xe5\xa0\x37\x8f\x57\xa6"
+			  "\xb0\x76\x0d\xa4\xd6\x28\x2b\x5d"
+			  "\xe1\x03\xd6\x1c\x6f\x34\x0d\xe7"
+			  "\x61\x2d\x2e\xe5\xae\x5d\x47\xc7"
+			  "\x80\x4b\x18\x8f\xa8\x99\xbc\x28"
+			  "\xed\x1d\x9d\x86\x7d\xd7\x41\xd1"
+			  "\xe0\x2b\xe1\x8c\x93\x2a\xa7\x80"
+			  "\xe1\x07\xa0\xa9\x9f\x8c\x8d\x1a"
+			  "\x55\xfc\x6b\x24\x7a\xbd\x3e\x51"
+			  "\x68\x4b\x26\x59\xc8\xa7\x16\xd9"
+			  "\xb9\x61\x13\xde\x8b\x63\x1c\xf6"
+			  "\x60\x01\xfb\x08\xb3\x5b\x0a\xbf"
+			  "\x34\x73\xda\x87\x87\x3d\x6f\x97"
+			  "\x4a\x0c\xa3\x58\x20\xa2\xc0\x81"
+			  "\x5b\x8c\xef\xa9\xc2\x01\x1e\x64"
+			  "\x83\x8c\xbc\x03\xb6\xd0\x29\x9f"
+			  "\x54\xe2\xce\x8b\xc2\x07\x85\x78"
+			  "\x25\x38\x96\x4c\xb4\xbe\x17\x4a"
+			  "\x65\xa6\xfa\x52\x9d\x66\x9d\x65"
+			  "\x4a\xd1\x01\x01\xf0\xcb\x13\xcc"
+			  "\xa5\x82\xf3\xf2\x66\xcd\x3f\x9d"
+			  "\xd1\xaa\xe4\x67\xea\xf2\xad\x88"
+			  "\x56\x76\xa7\x9b\x59\x3c\xb1\x5d"
+			  "\x78\xfd\x69\x79\x74\x78\x43\x26"
+			  "\x7b\xde\x3f\xf1\xf5\x4e\x14\xd9"
+			  "\x15\xf5\x75\xb5\x2e\x19\xf3\x0c"
+			  "\x48\x72\xd6\x71\x6d\x03\x6e\xaa"
+			  "\xa7\x08\xf9\xaa\x70\xa3\x0f\x4d"
+			  "\x12\x8a\xdd\xe3\x39\x73\x7e\xa7"
+			  "\xea\x1f\x6d\x06\x26\x2a\xf2\xc5"
+			  "\x52\xb4\xbf\xfd\x52\x0c\x06\x60"
+			  "\x90\xd1\xb2\x7b\x56\xae\xac\x58"
+			  "\x5a\x6b\x50\x2a\xf5\xe0\x30\x3c"
+			  "\x2a\x98\x0f\x1b\x5b\x0a\x84\x6c"
+			  "\x31\xae\x92\xe2\xd4\xbb\x7f\x59"
+			  "\x26\x10\xb9\x89\x37\x68\x26\xbf"
+			  "\x41\xc8\x49\xc4\x70\x35\x7d\xff"
+			  "\x2d\x7f\xf6\x8a\x93\x68\x8c\x78"
+			  "\x0d\x53\xce\x7d\xff\x7d\xfb\xae"
+			  "\x13\x1b\x75\xc4\x78\xd7\x71\xd8"
+			  "\xea\xd3\xf4\x9d\x95\x64\x8e\xb4"
+			  "\xde\xb8\xe4\xa6\x68\xc8\xae\x73"
+			  "\x58\xaf\xa8\xb0\x5a\x20\xde\x87"
+			  "\x43\xb9\x0f\xe3\xad\x41\x4b\xd5"
+			  "\xb7\xad\x16\x00\xa6\xff\xf6\x74"
+			  "\xbf\x8c\x9f\xb3\x58\x1b\xb6\x55"
+			  "\xa9\x90\x56\x28\xf0\xb5\x13\x4e"
+			  "\x9e\xf7\x25\x86\xe0\x07\x7b\x98"
+			  "\xd8\x60\x5d\x38\x95\x3c\xe4\x22"
+			  "\x16\x2f\xb2\xa2\xaf\xe8\x90\x17"
+			  "\xec\x11\x83\x1a\xf4\xa9\x26\xda"
+			  "\x39\x72\xf5\x94\x61\x05\x51\xec"
+			  "\xa8\x30\x8b\x2c\x13\xd0\x72\xac"
+			  "\xb9\xd2\xa0\x4c\x4b\x78\xe8\x6e"
+			  "\x04\x85\xe9\x04\x49\x82\x91\xff"
+			  "\x89\xe5\xab\x4c\xaa\x37\x03\x12"
+			  "\xca\x8b\x74\x10\xfd\x9e\xd9\x7b"
+			  "\xcb\xdb\x82\x6e\xce\x2e\x33\x39"
+			  "\xce\xd2\x84\x6e\x34\x71\x51\x6e"
+			  "\x0d\xd6\x01\x87\xc7\xfa\x0a\xd3"
+			  "\xad\x36\xf3\x4c\x9f\x96\x5e\x62"
+			  "\x62\x54\xc3\x03\x78\xd6\xab\xdd"
+			  "\x89\x73\x55\x25\x30\xf8\xa7\xe6"
+			  "\x4f\x11\x0c\x7c\x0a\xa1\x2b\x7b"
+			  "\x3d\x0d\xde\x81\xd4\x9d\x0b\xae"
+			  "\xdf\x00\xf9\x4c\xb6\x90\x8e\x16"
+			  "\xcb\x11\xc8\xd1\x2e\x73\x13\x75"
+			  "\x75\x3e\xaa\xf5\xee\x02\xb3\x18"
+			  "\xa6\x2d\xf5\x3b\x51\xd1\x1f\x47"
+			  "\x6b\x2c\xdb\xc4\x10\xe0\xc8\xba"
+			  "\x9d\xac\xb1\x9d\x75\xd5\x41\x0e"
+			  "\x7e\xbe\x18\x5b\xa4\x1f\xf8\x22"
+			  "\x4c\xc1\x68\xda\x6d\x51\x34\x6c"
+			  "\x19\x59\xec\xb5\xb1\xec\xa7\x03"
+			  "\xca\x54\x99\x63\x05\x6c\xb1\xac"
+			  "\x9c\x31\xd6\xdb\xba\x7b\x14\x12"
+			  "\x7a\xc3\x2f\xbf\x8d\xdc\x37\x46"
+			  "\xdb\xd2\xbc\xd4\x2f\xab\x30\xd5"
+			  "\xed\x34\x99\x8e\x83\x3e\xbe\x4c"
+			  "\x86\x79\x58\xe0\x33\x8d\x9a\xb8"
+			  "\xa9\xa6\x90\x46\xa2\x02\xb8\xdd"
+			  "\xf5\xf9\x1a\x5c\x8c\x01\xaa\x6e"
+			  "\xb4\x22\x12\xf5\x0c\x1b\x9b\x7a"
+			  "\xc3\x80\xf3\x06\x00\x5f\x30\xd5"
+			  "\x06\xdb\x7d\x82\xc2\xd4\x0b\x4c"
+			  "\x5f\xe9\xc5\xf5\xdf\x97\x12\xbf"
+			  "\x56\xaf\x9b\x69\xcd\xee\x30\xb4"
+			  "\xa8\x71\xff\x3e\x7d\x73\x7a\xb4"
+			  "\x0d\xa5\x46\x7a\xf3\xf4\x15\x87"
+			  "\x5d\x93\x2b\x8c\x37\x64\xb5\xdd"
+			  "\x48\xd1\xe5\x8c\xae\xd4\xf1\x76"
+			  "\xda\xf4\xba\x9e\x25\x0e\xad\xa3"
+			  "\x0d\x08\x7c\xa8\x82\x16\x8d\x90"
+			  "\x56\x40\x16\x84\xe7\x22\x53\x3a"
+			  "\x58\xbc\xb9\x8f\x33\xc8\xc2\x84"
+			  "\x22\xe6\x0d\xe7\xb3\xdc\x5d\xdf"
+			  "\xd7\x2a\x36\xe4\x16\x06\x07\xd2"
+			  "\x97\x60\xb2\xf5\x5e\x14\xc9\xfd"
+			  "\x8b\x05\xd1\xce\xee\x9a\x65\x99"
+			  "\xb7\xae\x19\xb7\xc8\xbc\xd5\xa2"
+			  "\x7b\x95\xe1\xcc\xba\x0d\xdc\x8a"
+			  "\x1d\x59\x52\x50\xaa\x16\x02\x82"
+			  "\xdf\x61\x33\x2e\x44\xce\x49\xc7"
+			  "\xe5\xc6\x2e\x76\xcf\x80\x52\xf0"
+			  "\x3d\x17\x34\x47\x3f\xd3\x80\x48"
+			  "\xa2\xba\xd5\xc7\x7b\x02\x28\xdb"
+			  "\xac\x44\xc7\x6e\x05\x5c\xc2\x79"
+			  "\xb3\x7d\x6a\x47\x77\x66\xf1\x38"
+			  "\xf0\xf5\x4f\x27\x1a\x31\xca\x6c"
+			  "\x72\x95\x92\x8e\x3f\xb0\xec\x1d"
+			  "\xc7\x2a\xff\x73\xee\xdf\x55\x80"
+			  "\x93\xd2\xbd\x34\xd3\x9f\x00\x51"
+			  "\xfb\x2e\x41\xba\x6c\x5a\x7c\x17"
+			  "\x7f\xe6\x70\xac\x8d\x39\x3f\x77"
+			  "\xe2\x23\xac\x8f\x72\x4e\xe4\x53"
+			  "\xcc\xf1\x1b\xf1\x35\xfe\x52\xa4"
+			  "\xd6\xb8\x40\x6b\xc1\xfd\xa0\xa1"
+			  "\xf5\x46\x65\xc2\x50\xbb\x43\xe2"
+			  "\xd1\x43\x28\x34\x74\xf5\x87\xa0"
+			  "\xf2\x5e\x27\x3b\x59\x2b\x3e\x49"
+			  "\xdf\x46\xee\xaf\x71\xd7\x32\x36"
+			  "\xc7\x14\x0b\x58\x6e\x3e\x2d\x41"
+			  "\xfa\x75\x66\x3a\x54\xe0\xb2\xb9"
+			  "\xaf\xdd\x04\x80\x15\x19\x3f\x6f"
+			  "\xce\x12\xb4\xd8\xe8\x89\x3c\x05"
+			  "\x30\xeb\xf3\x3d\xcd\x27\xec\xdc"
+			  "\x56\x70\x12\xcf\x78\x2b\x77\xbf"
+			  "\x22\xf0\x1b\x17\x9c\xcc\xd6\x1b"
+			  "\x2d\x3d\xa0\x3b\xd8\xc9\x70\xa4"
+			  "\x7a\x3e\x07\xb9\x06\xc3\xfa\xb0"
+			  "\x33\xee\xc1\xd8\xf6\xe0\xf0\xb2"
+			  "\x61\x12\x69\xb0\x5f\x28\x99\xda"
+			  "\xc3\x61\x48\xfa\x07\x16\x03\xc4"
+			  "\xa8\xe1\x3c\xe8\x0e\x64\x15\x30"
+			  "\xc1\x9d\x84\x2f\x73\x98\x0e\x3a"
+			  "\xf2\x86\x21\xa4\x9e\x1d\xb5\x86"
+			  "\x16\xdb\x2b\x9a\x06\x64\x8e\x79"
+			  "\x8d\x76\x3e\xc3\xc2\x64\x44\xe3"
+			  "\xda\xbc\x1a\x52\xd7\x61\x03\x65"
+			  "\x54\x32\x77\x01\xed\x9d\x8a\x43"
+			  "\x25\x24\xe3\xc1\xbe\xb8\x2f\xcb"
+			  "\x89\x14\x64\xab\xf6\xa0\x6e\x02"
+			  "\x57\xe4\x7d\xa9\x4e\x9a\x03\x36"
+			  "\xad\xf1\xb1\xfc\x0b\xe6\x79\x51"
+			  "\x9f\x81\x77\xc4\x14\x78\x9d\xbf"
+			  "\xb6\xd6\xa3\x8c\xba\x0b\x26\xe7"
+			  "\xc8\xb9\x5c\xcc\xe1\x5f\xd5\xc6"
+			  "\xc4\xca\xc2\xa3\x45\xba\x94\x13"
+			  "\xb2\x8f\xc3\x54\x01\x09\xe7\x8b"
+			  "\xda\x2a\x0a\x11\x02\x43\xcb\x57"
+			  "\xc9\xcc\xb5\x5c\xab\xc4\xec\x54"
+			  "\x00\x06\x34\xe1\x6e\x03\x89\x7c"
+			  "\xc6\xfb\x6a\xc7\x60\x43\xd6\xc5"
+			  "\xb5\x68\x72\x89\x8f\x42\xc3\x74"
+			  "\xbd\x25\xaa\x9f\x67\xb5\xdf\x26"
+			  "\x20\xe8\xb7\x01\x3c\xe4\x77\xce"
+			  "\xc4\x65\xa7\x23\x79\xea\x33\xc7"
+			  "\x82\x14\x5c\x82\xf2\x4e\x3d\xf6"
+			  "\xc6\x4a\x0e\x29\xbb\xec\x44\xcd"
+			  "\x2f\xd1\x4f\x21\x71\xa9\xce\x0f"
+			  "\x5c\xf2\x72\x5c\x08\x2e\x21\xd2"
+			  "\xc3\x29\x13\xd8\xac\xc3\xda\x13"
+			  "\x1a\x9d\xa7\x71\x1d\x27\x1d\x27"
+			  "\x1d\xea\xab\x44\x79\xad\xe5\xeb"
+			  "\xef\x1f\x22\x0a\x44\x4f\xcb\x87"
+			  "\xa7\x58\x71\x0e\x66\xf8\x60\xbf"
+			  "\x60\x74\x4a\xb4\xec\x2e\xfe\xd3"
+			  "\xf5\xb8\xfe\x46\x08\x50\x99\x6c"
+			  "\x66\xa5\xa8\x34\x44\xb5\xe5\xf0"
+			  "\xdd\x2c\x67\x4e\x35\x96\x8e\x67"
+			  "\x48\x3f\x5f\x37\x44\x60\x51\x2e"
+			  "\x14\x91\x5e\x57\xc3\x0e\x79\x77"
+			  "\x2f\x03\xf4\xe2\x1c\x72\xbf\x85"
+			  "\x5d\xd3\x17\xdf\x6c\xc5\x70\x24"
+			  "\x42\xdf\x51\x4e\x2a\xb2\xd2\x5b"
+			  "\x9e\x69\x83\x41\x11\xfe\x73\x22"
+			  "\xde\x8a\x9e\xd8\x8a\xfb\x20\x38"
+			  "\xd8\x47\x6f\xd5\xed\x8f\x41\xfd"
+			  "\x13\x7a\x18\x03\x7d\x0f\xcd\x7d"
+			  "\xa6\x7d\x31\x9e\xf1\x8f\x30\xa3"
+			  "\x8b\x4c\x24\xb7\xf5\x48\xd7\xd9"
+			  "\x12\xe7\x84\x97\x5c\x31\x6d\xfb"
+			  "\xdf\xf3\xd3\xd1\xd5\x0c\x30\x06"
+			  "\x01\x6a\xbc\x6c\x78\x7b\xa6\x50"
+			  "\xfa\x0f\x3c\x42\x2d\xa5\xa3\x3b"
+			  "\xcf\x62\x50\xff\x71\x6d\xe7\xda"
+			  "\x27\xab\xc6\x67\x16\x65\x68\x64"
+			  "\xc7\xd5\x5f\x81\xa9\xf6\x65\xb3"
+			  "\x5e\x43\x91\x16\xcd\x3d\x55\x37"
+			  "\x55\xb3\xf0\x28\xc5\x54\x19\xc0"
+			  "\xe0\xd6\x2a\x61\xd4\xc8\x72\x51"
+			  "\xe9\xa1\x7b\x48\x21\xad\x44\x09"
+			  "\xe4\x01\x61\x3c\x8a\x5b\xf9\xa1"
+			  "\x6e\x1b\xdf\xc0\x04\xa8\x8b\xf2"
+			  "\x21\xbe\x34\x7b\xfc\xa1\xcd\xc9"
+			  "\xa9\x96\xf4\xa4\x4c\xf7\x4e\x8f"
+			  "\x84\xcc\xd3\xa8\x92\x77\x8f\x36"
+			  "\xe2\x2e\x8c\x33\xe8\x84\xa6\x0c"
+			  "\x6c\x8a\xda\x14\x32\xc2\x96\xff"
+			  "\xc6\x4a\xc2\x9b\x30\x7f\xd1\x29"
+			  "\xc0\xd5\x78\x41\x00\x80\x80\x03"
+			  "\x2a\xb1\xde\x26\x03\x48\x49\xee"
+			  "\x57\x14\x76\x51\x3c\x36\x5d\x0a"
+			  "\x5c\x9f\xe8\xd8\x53\xdb\x4f\xd4"
+			  "\x38\xbf\x66\xc9\x75\x12\x18\x75"
+			  "\x34\x2d\x93\x22\x96\x51\x24\x6e"
+			  "\x4e\xd9\x30\xea\x67\xff\x92\x1c"
+			  "\x16\x26\xe9\xb5\x33\xab\x8c\x22"
+			  "\x47\xdb\xa0\x2c\x08\xf0\x12\x69"
+			  "\x7e\x93\x52\xda\xa5\xe5\xca\xc1"
+			  "\x0f\x55\x2a\xbd\x09\x30\x88\x1b"
+			  "\x9c\xc6\x9f\xe6\xdb\xa6\x92\xeb"
+			  "\xf4\xbd\x5c\xc4\xdb\xc6\x71\x09"
+			  "\xab\x5e\x48\x0c\xed\x6f\xda\x8e"
+			  "\x8d\x0c\x98\x71\x7d\x10\xd0\x9c"
+			  "\x20\x9b\x79\x53\x26\x5d\xb9\x85"
+			  "\x8a\x31\xb8\xc5\x1c\x97\xde\x88"
+			  "\x61\x55\x7f\x7c\x21\x06\xea\xc4"
+			  "\x5f\xaf\xf2\xf0\xd5\x5e\x7d\xb4"
+			  "\x6e\xcf\xe9\xae\x1b\x0e\x11\x80"
+			  "\xc1\x9a\x74\x7e\x52\x6f\xa0\xb7"
+			  "\x24\xcd\x8d\x0a\x11\x40\x63\x72"
+			  "\xfa\xe2\xc5\xb3\x94\xef\x29\xa2"
+			  "\x1a\x23\x43\x04\x37\x55\x0d\xe9"
+			  "\x83\xb2\x29\x51\x49\x64\xa0\xbd"
+			  "\xde\x73\xfd\xa5\x7c\x95\x70\x62"
+			  "\x58\xdc\xe2\xd0\xbf\x98\xf5\x8a"
+			  "\x6a\xfd\xce\xa8\x0e\x42\x2a\xeb"
+			  "\xd2\xff\x83\x27\x53\x5c\xa0\x6e"
+			  "\x93\xef\xe2\xb9\x5d\x35\xd6\x98"
+			  "\xf6\x71\x19\x7a\x54\xa1\xa7\xe8"
+			  "\x09\xfe\xf6\x9e\xc7\xbd\x3e\x29"
+			  "\xbd\x6b\x17\xf4\xe7\x3e\x10\x5c"
+			  "\xc1\xd2\x59\x4f\x4b\x12\x1a\x5b"
+			  "\x50\x80\x59\xb9\xec\x13\x66\xa8"
+			  "\xd2\x31\x7b\x6a\x61\x22\xdd\x7d"
+			  "\x61\xee\x87\x16\x46\x9f\xf9\xc7"
+			  "\x41\xee\x74\xf8\xd0\x96\x2c\x76"
+			  "\x2a\xac\x7d\x6e\x9f\x0e\x7f\x95"
+			  "\xfe\x50\x16\xb2\x23\xca\x62\xd5"
+			  "\x68\xcf\x07\x3f\x3f\x97\x85\x2a"
+			  "\x0c\x25\x45\xba\xdb\x32\xcb\x83"
+			  "\x8c\x4f\xe0\x6d\x9a\x99\xf9\xc9"
+			  "\xda\xd4\x19\x31\xc1\x7c\x6d\xd9"
+			  "\x9c\x56\xd3\xec\xc1\x81\x4c\xed"
+			  "\x28\x9d\x87\xeb\x19\xd7\x1a\x4f"
+			  "\x04\x6a\xcb\x1f\xcf\x1f\xa2\x16"
+			  "\xfc\x2a\x0d\xa1\x14\x2d\xfa\xc5"
+			  "\x5a\xd2\xc5\xf9\x19\x7c\x20\x1f"
+			  "\x2d\x10\xc0\x66\x7c\xd9\x2d\xe5"
+			  "\x88\x70\x59\xa7\x85\xd5\x2e\x7c"
+			  "\x5c\xe3\xb7\x12\xd6\x97\x3f\x29",
+		.psize	= 2048,
+		.digest	= "\x37\x90\x92\xc2\xeb\x01\x87\xd9"
+			  "\x95\xc7\x91\xc3\x17\x8b\x38\x52",
+	}
+};
+
 /*
  * DES test vectors.
  */
@@ -13622,1492 +14853,6 @@ static struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	},
 };
 
-/*
- * Speck test vectors taken from the original paper:
- * "The Simon and Speck Families of Lightweight Block Ciphers"
- * https://eprint.iacr.org/2013/404.pdf
- *
- * Note that the paper does not make byte and word order clear.  But it was
- * confirmed with the authors that the intended orders are little endian byte
- * order and (y, x) word order.  Equivalently, the printed test vectors, when
- * looking at only the bytes (ignoring the whitespace that divides them into
- * words), are backwards: the left-most byte is actually the one with the
- * highest memory address, while the right-most byte is actually the one with
- * the lowest memory address.
- */
-
-static struct cipher_testvec speck128_enc_tv_template[] = {
-	{ /* Speck128/128 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
-			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
-		.ilen	= 16,
-		.result	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
-			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
-		.rlen	= 16,
-	}, { /* Speck128/192 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17",
-		.klen	= 24,
-		.input	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
-			  "\x68\x69\x65\x66\x20\x48\x61\x72",
-		.ilen	= 16,
-		.result	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
-			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
-		.rlen	= 16,
-	}, { /* Speck128/256 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.klen	= 32,
-		.input	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
-			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
-		.ilen	= 16,
-		.result	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
-			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
-		.rlen	= 16,
-	},
-};
-
-static struct cipher_testvec speck128_dec_tv_template[] = {
-	{ /* Speck128/128 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
-			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
-		.ilen	= 16,
-		.result	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
-			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
-		.rlen	= 16,
-	}, { /* Speck128/192 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17",
-		.klen	= 24,
-		.input	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
-			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
-		.ilen	= 16,
-		.result	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
-			  "\x68\x69\x65\x66\x20\x48\x61\x72",
-		.rlen	= 16,
-	}, { /* Speck128/256 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.klen	= 32,
-		.input	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
-			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
-		.ilen	= 16,
-		.result	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
-			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
-		.rlen	= 16,
-	},
-};
-
-/*
- * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
- * result recomputed with Speck128 as the cipher
- */
-
-static struct cipher_testvec speck128_xts_enc_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
-			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
-			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
-			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
-			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
-			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
-			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
-			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
-			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
-			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
-			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
-			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
-			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
-			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
-			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
-			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
-			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
-			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
-			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
-			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
-			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
-			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
-			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
-			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
-			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
-			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
-			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
-			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
-			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
-			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
-			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
-			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
-			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
-			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
-			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
-			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
-			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
-			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
-			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
-			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
-			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
-			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
-			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
-			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
-			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
-			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
-			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
-			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
-			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
-			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
-			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
-			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
-			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
-			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
-			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
-			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
-			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
-			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
-			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
-			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
-			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
-			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
-			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
-			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
-			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
-			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
-			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
-			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
-			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
-			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
-			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
-			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
-			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
-			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
-			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
-			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
-			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
-			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
-			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
-			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
-			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
-			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
-			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
-			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
-			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
-			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
-			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
-			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
-			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
-			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
-			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
-			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
-			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
-			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
-			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
-			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
-			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
-			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
-			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
-			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
-			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
-			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
-			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
-			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
-			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
-			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
-			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
-			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
-			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
-			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
-			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
-			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
-			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
-			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
-			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
-			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
-			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
-			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
-			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
-			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
-			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
-			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
-			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
-			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
-			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
-			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
-			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
-			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
-			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
-			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
-			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
-			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
-			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
-			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
-			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
-			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-static struct cipher_testvec speck128_xts_dec_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
-			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
-			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
-			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
-			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
-			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
-			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
-			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
-			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
-			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
-			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
-			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
-			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
-			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
-			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
-			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
-			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
-			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
-			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
-			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
-			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
-			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
-			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
-			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
-			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
-			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
-			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
-			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
-			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
-			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
-			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
-			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
-			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
-			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
-			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
-			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
-			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
-			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
-			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
-			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
-			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
-			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
-			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
-			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
-			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
-			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
-			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
-			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
-			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
-			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
-			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
-			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
-			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
-			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
-			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
-			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
-			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
-			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
-			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
-			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
-			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
-			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
-			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
-			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
-			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
-			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
-			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
-			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
-			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
-			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
-			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
-			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
-			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
-			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
-			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
-			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
-			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
-			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
-			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
-			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
-			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
-			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
-			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
-			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
-			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
-			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
-			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
-			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
-			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
-			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
-			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
-			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
-			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
-			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
-			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
-			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
-			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
-			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
-			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
-			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
-			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
-			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
-			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
-			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
-			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
-			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
-			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
-			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
-			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
-			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
-			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
-			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
-			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
-			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
-			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
-			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
-			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
-			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
-			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
-			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
-			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
-			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
-			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
-			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
-			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
-			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
-			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
-			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
-			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
-			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
-			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
-			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
-			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
-			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
-			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
-			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-static struct cipher_testvec speck64_enc_tv_template[] = {
-	{ /* Speck64/96 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13",
-		.klen	= 12,
-		.input	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
-		.ilen	= 8,
-		.result	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
-		.rlen	= 8,
-	}, { /* Speck64/128 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
-		.klen	= 16,
-		.input	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
-		.ilen	= 8,
-		.result	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
-		.rlen	= 8,
-	},
-};
-
-static struct cipher_testvec speck64_dec_tv_template[] = {
-	{ /* Speck64/96 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13",
-		.klen	= 12,
-		.input	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
-		.ilen	= 8,
-		.result	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
-		.rlen	= 8,
-	}, { /* Speck64/128 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
-		.klen	= 16,
-		.input	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
-		.ilen	= 8,
-		.result	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
-		.rlen	= 8,
-	},
-};
-
-/*
- * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result
- * recomputed with Speck64 as the cipher, and key lengths adjusted
- */
-
-static struct cipher_testvec speck64_xts_enc_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
-			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
-			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
-			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
-			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
-			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
-			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
-			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
-			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
-			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
-			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
-			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
-			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
-			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
-			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
-			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
-			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
-			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
-			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
-			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
-			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
-			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
-			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
-			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
-			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
-			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
-			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
-			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
-			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
-			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
-			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
-			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
-			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
-			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
-			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
-			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
-			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
-			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
-			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
-			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
-			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
-			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
-			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
-			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
-			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
-			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
-			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
-			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
-			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
-			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
-			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
-			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
-			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
-			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
-			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
-			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
-			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
-			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
-			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
-			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
-			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
-			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
-			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
-			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
-			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
-			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
-			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
-			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
-			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
-			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
-			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
-			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
-			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27",
-		.klen	= 32,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
-			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
-			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
-			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
-			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
-			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
-			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
-			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
-			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
-			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
-			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
-			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
-			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
-			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
-			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
-			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
-			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
-			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
-			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
-			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
-			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
-			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
-			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
-			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
-			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
-			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
-			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
-			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
-			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
-			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
-			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
-			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
-			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
-			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
-			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
-			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
-			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
-			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
-			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
-			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
-			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
-			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
-			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
-			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
-			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
-			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
-			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
-			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
-			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
-			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
-			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
-			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
-			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
-			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
-			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
-			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
-			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
-			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
-			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
-			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
-			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
-			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
-			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
-			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-static struct cipher_testvec speck64_xts_dec_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
-			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
-			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
-			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
-			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
-			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
-			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
-			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
-			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
-			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
-			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
-			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
-			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
-			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
-			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
-			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
-			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
-			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
-			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
-			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
-			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
-			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
-			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
-			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
-			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
-			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
-			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
-			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
-			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
-			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
-			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
-			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
-			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
-			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
-			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
-			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
-			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
-			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
-			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
-			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
-			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
-			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
-			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
-			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
-			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
-			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
-			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
-			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
-			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
-			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
-			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
-			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
-			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
-			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
-			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
-			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
-			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
-			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
-			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
-			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
-			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
-			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
-			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
-			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
-			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
-			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
-			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
-			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
-			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
-			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
-			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
-			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
-			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27",
-		.klen	= 32,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
-			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
-			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
-			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
-			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
-			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
-			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
-			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
-			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
-			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
-			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
-			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
-			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
-			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
-			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
-			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
-			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
-			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
-			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
-			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
-			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
-			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
-			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
-			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
-			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
-			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
-			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
-			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
-			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
-			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
-			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
-			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
-			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
-			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
-			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
-			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
-			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
-			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
-			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
-			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
-			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
-			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
-			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
-			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
-			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
-			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
-			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
-			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
-			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
-			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
-			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
-			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
-			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
-			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
-			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
-			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
-			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
-			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
-			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
-			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
-			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
-			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
-			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
-			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
 /* Cast6 test vectors from RFC 2612 */
 #define CAST6_ENC_TEST_VECTORS		4
 #define CAST6_DEC_TEST_VECTORS		4
@@ -34569,6 +34314,2108 @@ static struct cipher_testvec chacha20_enc_tv_template[] = {
 	},
 };
 
+static struct cipher_testvec xchacha20_tv_template[] = {
+	{ /* from libsodium test/default/xchacha20.c */
+		.key	= "\x79\xc9\x97\x98\xac\x67\x30\x0b"
+			  "\xbb\x27\x04\xc9\x5c\x34\x1e\x32"
+			  "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e"
+			  "\x52\xff\x45\xb2\x4f\x30\x4f\xc4",
+		.klen	= 32,
+		.iv	= "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf"
+			  "\xbc\x9a\xee\x49\x41\x76\x88\xa0"
+			  "\xa2\x55\x4f\x8d\x95\x38\x94\x19"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00",
+		.result	= "\xc6\xe9\x75\x81\x60\x08\x3a\xc6"
+			  "\x04\xef\x90\xe7\x12\xce\x6e\x75"
+			  "\xd7\x79\x75\x90\x74\x4e\x0c\xf0"
+			  "\x60\xf0\x13\x73\x9c",
+		.ilen	= 29,
+		.rlen	= 29,
+	}, { /* from libsodium test/default/xchacha20.c */
+		.key	= "\x9d\x23\xbd\x41\x49\xcb\x97\x9c"
+			  "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98"
+			  "\x08\xcb\x0e\x50\xcd\x0f\x67\x81"
+			  "\x22\x35\xea\xaf\x60\x1d\x62\x32",
+		.klen	= 32,
+		.iv	= "\xc0\x47\x54\x82\x66\xb7\xc3\x70"
+			  "\xd3\x35\x66\xa2\x42\x5c\xbf\x30"
+			  "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00",
+		.result	= "\xa2\x12\x09\x09\x65\x94\xde\x8c"
+			  "\x56\x67\xb1\xd1\x3a\xd9\x3f\x74"
+			  "\x41\x06\xd0\x54\xdf\x21\x0e\x47"
+			  "\x82\xcd\x39\x6f\xec\x69\x2d\x35"
+			  "\x15\xa2\x0b\xf3\x51\xee\xc0\x11"
+			  "\xa9\x2c\x36\x78\x88\xbc\x46\x4c"
+			  "\x32\xf0\x80\x7a\xcd\x6c\x20\x3a"
+			  "\x24\x7e\x0d\xb8\x54\x14\x84\x68"
+			  "\xe9\xf9\x6b\xee\x4c\xf7\x18\xd6"
+			  "\x8d\x5f\x63\x7c\xbd\x5a\x37\x64"
+			  "\x57\x78\x8e\x6f\xae\x90\xfc\x31"
+			  "\x09\x7c\xfc",
+		.ilen	= 91,
+		.rlen	= 91,
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x67\xc6\x69\x73"
+			  "\x51\xff\x4a\xec\x29\xcd\xba\xab"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.result	= "\x9c\x49\x2a\xe7\x8a\x2f\x93\xc7"
+			  "\xb3\x33\x6f\x82\x17\xd8\xc4\x1e"
+			  "\xad\x80\x11\x11\x1d\x4c\x16\x18"
+			  "\x07\x73\x9b\x4f\xdb\x7c\xcb\x47"
+			  "\xfd\xef\x59\x74\xfa\x3f\xe5\x4c"
+			  "\x9b\xd0\xea\xbc\xba\x56\xad\x32"
+			  "\x03\xdc\xf8\x2b\xc1\xe1\x75\x67"
+			  "\x23\x7b\xe6\xfc\xd4\x03\x86\x54",
+		.ilen	= 64,
+		.rlen	= 64,
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\xf2\xfb\xe3\x46"
+			  "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d"
+			  "\x01\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+			  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+			  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+			  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+			  "\x64\x65\x64\x20\x62\x79\x20\x74"
+			  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+			  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+			  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+			  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+			  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+			  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+			  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+			  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+			  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+			  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+			  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+			  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+			  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+			  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+			  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+			  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+			  "\x45\x54\x46\x20\x61\x63\x74\x69"
+			  "\x76\x69\x74\x79\x20\x69\x73\x20"
+			  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+			  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+			  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+			  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+			  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+			  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+			  "\x54\x46\x20\x73\x65\x73\x73\x69"
+			  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+			  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+			  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+			  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+			  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+			  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+			  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+			  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+			  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+			  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+			  "\x20\x77\x68\x69\x63\x68\x20\x61"
+			  "\x72\x65\x20\x61\x64\x64\x72\x65"
+			  "\x73\x73\x65\x64\x20\x74\x6f",
+		.result	= "\xf9\xab\x7a\x4a\x60\xb8\x5f\xa0"
+			  "\x50\xbb\x57\xce\xef\x8c\xc1\xd9"
+			  "\x24\x15\xb3\x67\x5e\x7f\x01\xf6"
+			  "\x1c\x22\xf6\xe5\x71\xb1\x43\x64"
+			  "\x63\x05\xd5\xfc\x5c\x3d\xc0\x0e"
+			  "\x23\xef\xd3\x3b\xd9\xdc\x7f\xa8"
+			  "\x58\x26\xb3\xd0\xc2\xd5\x04\x3f"
+			  "\x0a\x0e\x8f\x17\xe4\xcd\xf7\x2a"
+			  "\xb4\x2c\x09\xe4\x47\xec\x8b\xfb"
+			  "\x59\x37\x7a\xa1\xd0\x04\x7e\xaa"
+			  "\xf1\x98\x5f\x24\x3d\x72\x9a\x43"
+			  "\xa4\x36\x51\x92\x22\x87\xff\x26"
+			  "\xce\x9d\xeb\x59\x78\x84\x5e\x74"
+			  "\x97\x2e\x63\xc0\xef\x29\xf7\x8a"
+			  "\xb9\xee\x35\x08\x77\x6a\x35\x9a"
+			  "\x3e\xe6\x4f\x06\x03\x74\x1b\xc1"
+			  "\x5b\xb3\x0b\x89\x11\x07\xd3\xb7"
+			  "\x53\xd6\x25\x04\xd9\x35\xb4\x5d"
+			  "\x4c\x33\x5a\xc2\x42\x4c\xe6\xa4"
+			  "\x97\x6e\x0e\xd2\xb2\x8b\x2f\x7f"
+			  "\x28\xe5\x9f\xac\x4b\x2e\x02\xab"
+			  "\x85\xfa\xa9\x0d\x7c\x2d\x10\xe6"
+			  "\x91\xab\x55\x63\xf0\xde\x3a\x94"
+			  "\x25\x08\x10\x03\xc2\x68\xd1\xf4"
+			  "\xaf\x7d\x9c\x99\xf7\x86\x96\x30"
+			  "\x60\xfc\x0b\xe6\xa8\x80\x15\xb0"
+			  "\x81\xb1\x0c\xbe\xb9\x12\x18\x25"
+			  "\xe9\x0e\xb1\xe7\x23\xb2\xef\x4a"
+			  "\x22\x8f\xc5\x61\x89\xd4\xe7\x0c"
+			  "\x64\x36\x35\x61\xb6\x34\x60\xf7"
+			  "\x7b\x61\x37\x37\x12\x10\xa2\xf6"
+			  "\x7e\xdb\x7f\x39\x3f\xb6\x8e\x89"
+			  "\x9e\xf3\xfe\x13\x98\xbb\x66\x5a"
+			  "\xec\xea\xab\x3f\x9c\x87\xc4\x8c"
+			  "\x8a\x04\x18\x49\xfc\x77\x11\x50"
+			  "\x16\xe6\x71\x2b\xee\xc0\x9c\xb6"
+			  "\x87\xfd\x80\xff\x0b\x1d\x73\x38"
+			  "\xa4\x1d\x6f\xae\xe4\x12\xd7\x93"
+			  "\x9d\xcd\x38\x26\x09\x40\x52\xcd"
+			  "\x67\x01\x67\x26\xe0\x3e\x98\xa8"
+			  "\xe8\x1a\x13\x41\xbb\x90\x4d\x87"
+			  "\xbb\x42\x82\x39\xce\x3a\xd0\x18"
+			  "\x6d\x7b\x71\x8f\xbb\x2c\x6a\xd1"
+			  "\xbd\xf5\xc7\x8a\x7e\xe1\x1e\x0f"
+			  "\x0d\x0d\x13\x7c\xd9\xd8\x3c\x91"
+			  "\xab\xff\x1f\x12\xc3\xee\xe5\x65"
+			  "\x12\x8d\x7b\x61\xe5\x1f\x98",
+		.ilen	= 375,
+		.rlen	= 375,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 375 - 20, 4, 16 },
+
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x76\x5a\x2e\x63"
+			  "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7"
+			  "\x2a\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x27\x54\x77\x61\x73\x20\x62\x72"
+			  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+			  "\x6e\x64\x20\x74\x68\x65\x20\x73"
+			  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+			  "\x76\x65\x73\x0a\x44\x69\x64\x20"
+			  "\x67\x79\x72\x65\x20\x61\x6e\x64"
+			  "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+			  "\x69\x6e\x20\x74\x68\x65\x20\x77"
+			  "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+			  "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+			  "\x65\x72\x65\x20\x74\x68\x65\x20"
+			  "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+			  "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+			  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+			  "\x72\x61\x74\x68\x73\x20\x6f\x75"
+			  "\x74\x67\x72\x61\x62\x65\x2e",
+		.result	= "\x95\xb9\x51\xe7\x8f\xb4\xa4\x03"
+			  "\xca\x37\xcc\xde\x60\x1d\x8c\xe2"
+			  "\xf1\xbb\x8a\x13\x7f\x61\x85\xcc"
+			  "\xad\xf4\xf0\xdc\x86\xa6\x1e\x10"
+			  "\xbc\x8e\xcb\x38\x2b\xa5\xc8\x8f"
+			  "\xaa\x03\x3d\x53\x4a\x42\xb1\x33"
+			  "\xfc\xd3\xef\xf0\x8e\x7e\x10\x9c"
+			  "\x6f\x12\x5e\xd4\x96\xfe\x5b\x08"
+			  "\xb6\x48\xf0\x14\x74\x51\x18\x7c"
+			  "\x07\x92\xfc\xac\x9d\xf1\x94\xc0"
+			  "\xc1\x9d\xc5\x19\x43\x1f\x1d\xbb"
+			  "\x07\xf0\x1b\x14\x25\x45\xbb\xcb"
+			  "\x5c\xe2\x8b\x28\xf3\xcf\x47\x29"
+			  "\x27\x79\x67\x24\xa6\x87\xc2\x11"
+			  "\x65\x03\xfa\x45\xf7\x9e\x53\x7a"
+			  "\x99\xf1\x82\x25\x4f\x8d\x07",
+		.ilen	= 127,
+		.rlen	= 127,
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01\x31\x58\xa3\x5a"
+			  "\x25\x5d\x05\x17\x58\xe9\x5e\xd4"
+			  "\x1c\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+			  "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
+			  "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
+			  "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
+			  "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4"
+			  "\x19\x4b\x01\x0f\x4e\xa4\x43\xce"
+			  "\x01\xc6\x67\xda\x03\x91\x18\x90"
+			  "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac"
+			  "\x74\x92\xd3\x53\x47\xc8\xdd\x25"
+			  "\x53\x6c\x02\x03\x87\x0d\x11\x0c"
+			  "\x58\xe3\x12\x18\xfd\x2a\x5b\x40"
+			  "\x0c\x30\xf0\xb8\x3f\x43\xce\xae"
+			  "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc"
+			  "\x33\x97\xc3\x77\xba\xc5\x70\xde"
+			  "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f"
+			  "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c"
+			  "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c"
+			  "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe"
+			  "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6"
+			  "\x79\x49\x41\xf4\x58\x18\xcb\x86"
+			  "\x7f\x30\x0e\xf8\x7d\x44\x36\xea"
+			  "\x75\xeb\x88\x84\x40\x3c\xad\x4f"
+			  "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5"
+			  "\x21\x66\xe9\xa7\xe3\xb2\x15\x88"
+			  "\x78\xf6\x79\xa1\x59\x47\x12\x4e"
+			  "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08"
+			  "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b"
+			  "\xdd\x60\x71\xf7\x47\x8c\x61\xc3"
+			  "\xda\x8a\x78\x1e\x16\xfa\x1e\x86"
+			  "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7"
+			  "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4"
+			  "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6"
+			  "\xf4\xe6\x33\x43\x84\x93\xa5\x67"
+			  "\x9b\x16\x58\x58\x80\x0f\x2b\x5c"
+			  "\x24\x74\x75\x7f\x95\x81\xb7\x30"
+			  "\x7a\x33\xa7\xf7\x94\x87\x32\x27"
+			  "\x10\x5d\x14\x4c\x43\x29\xdd\x26"
+			  "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10"
+			  "\xea\x6b\x64\xfd\x73\xc6\xed\xec"
+			  "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07"
+			  "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5"
+			  "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1"
+			  "\xec\xca\x60\x09\x4c\x6a\xd5\x09"
+			  "\x49\x46\x00\x88\x22\x8d\xce\xea"
+			  "\xb1\x17\x11\xde\x42\xd2\x23\xc1"
+			  "\x72\x11\xf5\x50\x73\x04\x40\x47"
+			  "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0"
+			  "\x3f\x58\xc1\x52\xab\x12\x67\x9d"
+			  "\x3f\x43\x4b\x68\xd4\x9c\x68\x38"
+			  "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b"
+			  "\xf9\xe5\x31\x69\x22\xf9\xa6\x69"
+			  "\xc6\x9c\x96\x9a\x12\x35\x95\x1d"
+			  "\x95\xd5\xdd\xbe\xbf\x93\x53\x24"
+			  "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00"
+			  "\x6f\x88\xc4\x37\x18\x69\x7c\xd7"
+			  "\x41\x92\x55\x4c\x03\xa1\x9a\x4b"
+			  "\x15\xe5\xdf\x7f\x37\x33\x72\xc1"
+			  "\x8b\x10\x67\xa3\x01\x57\x94\x25"
+			  "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73"
+			  "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda"
+			  "\x58\xb1\x47\x90\xfe\x42\x21\x72"
+			  "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd"
+			  "\xc6\x84\x6e\xca\xae\xe3\x68\xb4"
+			  "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b"
+			  "\x03\xa1\x31\xd9\xde\x8d\xf5\x22"
+			  "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76"
+			  "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe"
+			  "\x54\xf7\x27\x1b\xf4\xde\x02\xf5"
+			  "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e"
+			  "\x4b\x6e\xed\x46\x23\xdc\x65\xb2"
+			  "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7"
+			  "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8"
+			  "\x65\x69\x8a\x45\x29\xef\x74\x85"
+			  "\xde\x79\xc7\x08\xae\x30\xb0\xf4"
+			  "\xa3\x1d\x51\x41\xab\xce\xcb\xf6"
+			  "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3"
+			  "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7"
+			  "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9"
+			  "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a"
+			  "\x9f\xd7\xb9\x6c\x65\x14\x22\x45"
+			  "\x6e\x45\x32\x3e\x7e\x60\x1a\x12"
+			  "\x97\x82\x14\xfb\xaa\x04\x22\xfa"
+			  "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d"
+			  "\x78\x33\x5a\x7c\xad\xdb\x29\xce"
+			  "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac"
+			  "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21"
+			  "\x83\x35\x7e\xad\x73\xc2\xb5\x6c"
+			  "\x10\x26\x38\x07\xe5\xc7\x36\x80"
+			  "\xe2\x23\x12\x61\xf5\x48\x4b\x2b"
+			  "\xc5\xdf\x15\xd9\x87\x01\xaa\xac"
+			  "\x1e\x7c\xad\x73\x78\x18\x63\xe0"
+			  "\x8b\x9f\x81\xd8\x12\x6a\x28\x10"
+			  "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c"
+			  "\x83\x66\x80\x47\x80\xe8\xfd\x35"
+			  "\x1c\x97\x6f\xae\x49\x10\x66\xcc"
+			  "\xc6\xd8\xcc\x3a\x84\x91\x20\x77"
+			  "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9"
+			  "\x25\x94\x10\x5f\x40\x00\x64\x99"
+			  "\xdc\xae\xd7\x21\x09\x78\x50\x15"
+			  "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39"
+			  "\x87\x6e\x6d\xab\xde\x08\x51\x16"
+			  "\xc7\x13\xe9\xea\xed\x06\x8e\x2c"
+			  "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43"
+			  "\xb6\x98\x37\xb2\x43\xed\xde\xdf"
+			  "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b"
+			  "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9"
+			  "\x80\x55\xc9\x34\x91\xd1\x59\xe8"
+			  "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06"
+			  "\x20\xa8\x5d\xfa\xd1\xde\x70\x56"
+			  "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8"
+			  "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5"
+			  "\x44\x4b\x9f\xc2\x93\x03\xea\x2b"
+			  "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23"
+			  "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee"
+			  "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab"
+			  "\x82\x6b\x37\x04\xeb\x74\xbe\x79"
+			  "\xb9\x83\x90\xef\x20\x59\x46\xff"
+			  "\xe9\x97\x3e\x2f\xee\xb6\x64\x18"
+			  "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a"
+			  "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4"
+			  "\xce\xd3\x91\x49\x88\xc7\xb8\x4d"
+			  "\xb1\xb9\x07\x6d\x16\x72\xae\x46"
+			  "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8"
+			  "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62"
+			  "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9"
+			  "\x94\x97\xea\xdd\x58\x9e\xae\x76"
+			  "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde"
+			  "\xf7\x32\x87\xcd\x93\xbf\x11\x56"
+			  "\x11\xbe\x08\x74\xe1\x69\xad\xe2"
+			  "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe"
+			  "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76"
+			  "\x35\xea\x5d\x85\x81\xaf\x85\xeb"
+			  "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc"
+			  "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a"
+			  "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9"
+			  "\x37\x1c\xeb\x46\x54\x3f\xa5\x91"
+			  "\xc2\xb5\x8c\xfe\x53\x08\x97\x32"
+			  "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc"
+			  "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1"
+			  "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c"
+			  "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd"
+			  "\x20\x4e\x7c\x51\xb0\x60\x73\xb8"
+			  "\x9c\xac\x91\x90\x7e\x01\xb0\xe1"
+			  "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a"
+			  "\x06\x52\x95\x52\xb2\xe9\x25\x2e"
+			  "\x4c\xe2\x5a\x00\xb2\x13\x81\x03"
+			  "\x77\x66\x0d\xa5\x99\xda\x4e\x8c"
+			  "\xac\xf3\x13\x53\x27\x45\xaf\x64"
+			  "\x46\xdc\xea\x23\xda\x97\xd1\xab"
+			  "\x7d\x6c\x30\x96\x1f\xbc\x06\x34"
+			  "\x18\x0b\x5e\x21\x35\x11\x8d\x4c"
+			  "\xe0\x2d\xe9\x50\x16\x74\x81\xa8"
+			  "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc"
+			  "\xca\x34\x83\x27\x10\x5b\x68\x45"
+			  "\x8f\x52\x22\x0c\x55\x3d\x29\x7c"
+			  "\xe3\xc0\x66\x05\x42\x91\x5f\x58"
+			  "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19"
+			  "\x04\xa9\x08\x4b\x57\xfc\x67\x53"
+			  "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
+			  "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
+			  "\x72",
+		.result	= "\x3a\x92\xee\x53\x31\xaf\x2b\x60"
+			  "\x5f\x55\x8d\x00\x5d\xfc\x74\x97"
+			  "\x28\x54\xf4\xa5\x75\xf1\x9b\x25"
+			  "\x62\x1c\xc0\xe0\x13\xc8\x87\x53"
+			  "\xd0\xf3\xa7\x97\x1f\x3b\x1e\xea"
+			  "\xe0\xe5\x2a\xd1\xdd\xa4\x3b\x50"
+			  "\x45\xa3\x0d\x7e\x1b\xc9\xa0\xad"
+			  "\xb9\x2c\x54\xa6\xc7\x55\x16\xd0"
+			  "\xc5\x2e\x02\x44\x35\xd0\x7e\x67"
+			  "\xf2\xc4\x9b\xcd\x95\x10\xcc\x29"
+			  "\x4b\xfa\x86\x87\xbe\x40\x36\xbe"
+			  "\xe1\xa3\x52\x89\x55\x20\x9b\xc2"
+			  "\xab\xf2\x31\x34\x16\xad\xc8\x17"
+			  "\x65\x24\xc0\xff\x12\x37\xfe\x5a"
+			  "\x62\x3b\x59\x47\x6c\x5f\x3a\x8e"
+			  "\x3b\xd9\x30\xc8\x7f\x2f\x88\xda"
+			  "\x80\xfd\x02\xda\x7f\x9a\x7a\x73"
+			  "\x59\xc5\x34\x09\x9a\x11\xcb\xa7"
+			  "\xfc\xf6\xa1\xa0\x60\xfb\x43\xbb"
+			  "\xf1\xe9\xd7\xc6\x79\x27\x4e\xff"
+			  "\x22\xb4\x24\xbf\x76\xee\x47\xb9"
+			  "\x6d\x3f\x8b\xb0\x9c\x3c\x43\xdd"
+			  "\xff\x25\x2e\x6d\xa4\x2b\xfb\x5d"
+			  "\x1b\x97\x6c\x55\x0a\x82\x7a\x7b"
+			  "\x94\x34\xc2\xdb\x2f\x1f\xc1\xea"
+			  "\xd4\x4d\x17\x46\x3b\x51\x69\x09"
+			  "\xe4\x99\x32\x25\xfd\x94\xaf\xfb"
+			  "\x10\xf7\x4f\xdd\x0b\x3c\x8b\x41"
+			  "\xb3\x6a\xb7\xd1\x33\xa8\x0c\x2f"
+			  "\x62\x4c\x72\x11\xd7\x74\xe1\x3b"
+			  "\x38\x43\x66\x7b\x6c\x36\x48\xe7"
+			  "\xe3\xe7\x9d\xb9\x42\x73\x7a\x2a"
+			  "\x89\x20\x1a\x41\x80\x03\xf7\x8f"
+			  "\x61\x78\x13\xbf\xfe\x50\xf5\x04"
+			  "\x52\xf9\xac\x47\xf8\x62\x4b\xb2"
+			  "\x24\xa9\xbf\x64\xb0\x18\x69\xd2"
+			  "\xf5\xe4\xce\xc8\xb1\x87\x75\xd6"
+			  "\x2c\x24\x79\x00\x7d\x26\xfb\x44"
+			  "\xe7\x45\x7a\xee\x58\xa5\x83\xc1"
+			  "\xb4\x24\xab\x23\x2f\x4d\xd7\x4f"
+			  "\x1c\xc7\xaa\xa9\x50\xf4\xa3\x07"
+			  "\x12\x13\x89\x74\xdc\x31\x6a\xb2"
+			  "\xf5\x0f\x13\x8b\xb9\xdb\x85\x1f"
+			  "\xf5\xbc\x88\xd9\x95\xea\x31\x6c"
+			  "\x36\x60\xb6\x49\xdc\xc4\xf7\x55"
+			  "\x3f\x21\xc1\xb5\x92\x18\x5e\xbc"
+			  "\x9f\x87\x7f\xe7\x79\x25\x40\x33"
+			  "\xd6\xb9\x33\xd5\x50\xb3\xc7\x89"
+			  "\x1b\x12\xa0\x46\xdd\xa7\xd8\x3e"
+			  "\x71\xeb\x6f\x66\xa1\x26\x0c\x67"
+			  "\xab\xb2\x38\x58\x17\xd8\x44\x3b"
+			  "\x16\xf0\x8e\x62\x8d\x16\x10\x00"
+			  "\x32\x8b\xef\xb9\x28\xd3\xc5\xad"
+			  "\x0a\x19\xa2\xe4\x03\x27\x7d\x94"
+			  "\x06\x18\xcd\xd6\x27\x00\xf9\x1f"
+			  "\xb6\xb3\xfe\x96\x35\x5f\xc4\x1c"
+			  "\x07\x62\x10\x79\x68\x50\xf1\x7e"
+			  "\x29\xe7\xc4\xc4\xe7\xee\x54\xd6"
+			  "\x58\x76\x84\x6d\x8d\xe4\x59\x31"
+			  "\xe9\xf4\xdc\xa1\x1f\xe5\x1a\xd6"
+			  "\xe6\x64\x46\xf5\x77\x9c\x60\x7a"
+			  "\x5e\x62\xe3\x0a\xd4\x9f\x7a\x2d"
+			  "\x7a\xa5\x0a\x7b\x29\x86\x7a\x74"
+			  "\x74\x71\x6b\xca\x7d\x1d\xaa\xba"
+			  "\x39\x84\x43\x76\x35\xfe\x4f\x9b"
+			  "\xbb\xbb\xb5\x6a\x32\xb5\x5d\x41"
+			  "\x51\xf0\x5b\x68\x03\x47\x4b\x8a"
+			  "\xca\x88\xf6\x37\xbd\x73\x51\x70"
+			  "\x66\xfe\x9e\x5f\x21\x9c\xf3\xdd"
+			  "\xc3\xea\x27\xf9\x64\x94\xe1\x19"
+			  "\xa0\xa9\xab\x60\xe0\x0e\xf7\x78"
+			  "\x70\x86\xeb\xe0\xd1\x5c\x05\xd3"
+			  "\xd7\xca\xe0\xc0\x47\x47\x34\xee"
+			  "\x11\xa3\xa3\x54\x98\xb7\x49\x8e"
+			  "\x84\x28\x70\x2c\x9e\xfb\x55\x54"
+			  "\x4d\xf8\x86\xf7\x85\x7c\xbd\xf3"
+			  "\x17\xd8\x47\xcb\xac\xf4\x20\x85"
+			  "\x34\x66\xad\x37\x2d\x5e\x52\xda"
+			  "\x8a\xfe\x98\x55\x30\xe7\x2d\x2b"
+			  "\x19\x10\x8e\x7b\x66\x5e\xdc\xe0"
+			  "\x45\x1f\x7b\xb4\x08\xfb\x8f\xf6"
+			  "\x8c\x89\x21\x34\x55\x27\xb2\x76"
+			  "\xb2\x07\xd9\xd6\x68\x9b\xea\x6b"
+			  "\x2d\xb4\xc4\x35\xdd\xd2\x79\xae"
+			  "\xc7\xd6\x26\x7f\x12\x01\x8c\xa7"
+			  "\xe3\xdb\xa8\xf4\xf7\x2b\xec\x99"
+			  "\x11\x00\xf1\x35\x8c\xcf\xd5\xc9"
+			  "\xbd\x91\x36\x39\x70\xcf\x7d\x70"
+			  "\x47\x1a\xfc\x6b\x56\xe0\x3f\x9c"
+			  "\x60\x49\x01\x72\xa9\xaf\x2c\x9c"
+			  "\xe8\xab\xda\x8c\x14\x19\xf3\x75"
+			  "\x07\x17\x9d\x44\x67\x7a\x2e\xef"
+			  "\xb7\x83\x35\x4a\xd1\x3d\x1c\x84"
+			  "\x32\xdd\xaa\xea\xca\x1d\xdc\x72"
+			  "\x2c\xcc\x43\xcd\x5d\xe3\x21\xa4"
+			  "\xd0\x8a\x4b\x20\x12\xa3\xd5\x86"
+			  "\x76\x96\xff\x5f\x04\x57\x0f\xe6"
+			  "\xba\xe8\x76\x50\x0c\x64\x1d\x83"
+			  "\x9c\x9b\x9a\x9a\x58\x97\x9c\x5c"
+			  "\xb4\xa4\xa6\x3e\x19\xeb\x8f\x5a"
+			  "\x61\xb2\x03\x7b\x35\x19\xbe\xa7"
+			  "\x63\x0c\xfd\xdd\xf9\x90\x6c\x08"
+			  "\x19\x11\xd3\x65\x4a\xf5\x96\x92"
+			  "\x59\xaa\x9c\x61\x0c\x29\xa7\xf8"
+			  "\x14\x39\x37\xbf\x3c\xf2\x16\x72"
+			  "\x02\xfa\xa2\xf3\x18\x67\x5d\xcb"
+			  "\xdc\x4d\xbb\x96\xff\x70\x08\x2d"
+			  "\xc2\xa8\x52\xe1\x34\x5f\x72\xfe"
+			  "\x64\xbf\xca\xa7\x74\x38\xfb\x74"
+			  "\x55\x9c\xfa\x8a\xed\xfb\x98\xeb"
+			  "\x58\x2e\x6c\xe1\x52\x76\x86\xd7"
+			  "\xcf\xa1\xa4\xfc\xb2\x47\x41\x28"
+			  "\xa3\xc1\xe5\xfd\x53\x19\x28\x2b"
+			  "\x37\x04\x65\x96\x99\x7a\x28\x0f"
+			  "\x07\x68\x4b\xc7\x52\x0a\x55\x35"
+			  "\x40\x19\x95\x61\xe8\x59\x40\x1f"
+			  "\x9d\xbf\x78\x7d\x8f\x84\xff\x6f"
+			  "\xd0\xd5\x63\xd2\x22\xbd\xc8\x4e"
+			  "\xfb\xe7\x9f\x06\xe6\xe7\x39\x6d"
+			  "\x6a\x96\x9f\xf0\x74\x7e\xc9\x35"
+			  "\xb7\x26\xb8\x1c\x0a\xa6\x27\x2c"
+			  "\xa2\x2b\xfe\xbe\x0f\x07\x73\xae"
+			  "\x7f\x7f\x54\xf5\x7c\x6a\x0a\x56"
+			  "\x49\xd4\x81\xe5\x85\x53\x99\x1f"
+			  "\x95\x05\x13\x58\x8d\x0e\x1b\x90"
+			  "\xc3\x75\x48\x64\x58\x98\x67\x84"
+			  "\xae\xe2\x21\xa2\x8a\x04\x0a\x0b"
+			  "\x61\xaa\xb0\xd4\x28\x60\x7a\xf8"
+			  "\xbc\x52\xfb\x24\x7f\xed\x0d\x2a"
+			  "\x0a\xb2\xf9\xc6\x95\xb5\x11\xc9"
+			  "\xf4\x0f\x26\x11\xcf\x2a\x57\x87"
+			  "\x7a\xf3\xe7\x94\x65\xc2\xb5\xb3"
+			  "\xab\x98\xe3\xc1\x2b\x59\x19\x7c"
+			  "\xd6\xf3\xf9\xbf\xff\x6d\xc6\x82"
+			  "\x13\x2f\x4a\x2e\xcd\x26\xfe\x2d"
+			  "\x01\x70\xf4\xc2\x7f\x1f\x4c\xcb"
+			  "\x47\x77\x0c\xa0\xa3\x03\xec\xda"
+			  "\xa9\xbf\x0d\x2d\xae\xe4\xb8\x7b"
+			  "\xa9\xbc\x08\xb4\x68\x2e\xc5\x60"
+			  "\x8d\x87\x41\x2b\x0f\x69\xf0\xaf"
+			  "\x5f\xba\x72\x20\x0f\x33\xcd\x6d"
+			  "\x36\x7d\x7b\xd5\x05\xf1\x4b\x05"
+			  "\xc4\xfc\x7f\x80\xb9\x4d\xbd\xf7"
+			  "\x7c\x84\x07\x01\xc2\x40\x66\x5b"
+			  "\x98\xc7\x2c\xe3\x97\xfa\xdf\x87"
+			  "\xa0\x1f\xe9\x21\x42\x0f\x3b\xeb"
+			  "\x89\x1c\x3b\xca\x83\x61\x77\x68"
+			  "\x84\xbb\x60\x87\x38\x2e\x25\xd5"
+			  "\x9e\x04\x41\x70\xac\xda\xc0\x9c"
+			  "\x9c\x69\xea\x8d\x4e\x55\x2a\x29"
+			  "\xed\x05\x4b\x7b\x73\x71\x90\x59"
+			  "\x4d\xc8\xd8\x44\xf0\x4c\xe1\x5e"
+			  "\x84\x47\x55\xcc\x32\x3f\xe7\x97"
+			  "\x42\xc6\x32\xac\x40\xe5\xa5\xc7"
+			  "\x8b\xed\xdb\xf7\x83\xd6\xb1\xc2"
+			  "\x52\x5e\x34\xb7\xeb\x6e\xd9\xfc"
+			  "\xe5\x93\x9a\x97\x3e\xb0\xdc\xd9"
+			  "\xd7\x06\x10\xb6\x1d\x80\x59\xdd"
+			  "\x0d\xfe\x64\x35\xcd\x5d\xec\xf0"
+			  "\xba\xd0\x34\xc9\x2d\x91\xc5\x17"
+			  "\x11",
+		.ilen	= 1281,
+		.rlen	= 1281,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 1200, 1, 80 },
+	},
+};
+
+/*
+ * Same as XChaCha20 test vectors above, but recomputed the ciphertext with
+ * XChaCha12, using a modified libsodium.
+ */
+static struct cipher_testvec xchacha12_tv_template[] = {
+	{
+		.key	= "\x79\xc9\x97\x98\xac\x67\x30\x0b"
+			  "\xbb\x27\x04\xc9\x5c\x34\x1e\x32"
+			  "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e"
+			  "\x52\xff\x45\xb2\x4f\x30\x4f\xc4",
+		.klen	= 32,
+		.iv	= "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf"
+			  "\xbc\x9a\xee\x49\x41\x76\x88\xa0"
+			  "\xa2\x55\x4f\x8d\x95\x38\x94\x19"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00",
+		.result	= "\x1b\x78\x7f\xd7\xa1\x41\x68\xab"
+			  "\x3d\x3f\xd1\x7b\x69\x56\xb2\xd5"
+			  "\x43\xce\xeb\xaf\x36\xf0\x29\x9d"
+			  "\x3a\xfb\x18\xae\x1b",
+		.ilen	= 29,
+		.rlen	= 29,
+	}, {
+		.key	= "\x9d\x23\xbd\x41\x49\xcb\x97\x9c"
+			  "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98"
+			  "\x08\xcb\x0e\x50\xcd\x0f\x67\x81"
+			  "\x22\x35\xea\xaf\x60\x1d\x62\x32",
+		.klen	= 32,
+		.iv	= "\xc0\x47\x54\x82\x66\xb7\xc3\x70"
+			  "\xd3\x35\x66\xa2\x42\x5c\xbf\x30"
+			  "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00",
+		.result	= "\xfb\x32\x09\x1d\x83\x05\xae\x4c"
+			  "\x13\x1f\x12\x71\xf2\xca\xb2\xeb"
+			  "\x5b\x83\x14\x7d\x83\xf6\x57\x77"
+			  "\x2e\x40\x1f\x92\x2c\xf9\xec\x35"
+			  "\x34\x1f\x93\xdf\xfb\x30\xd7\x35"
+			  "\x03\x05\x78\xc1\x20\x3b\x7a\xe3"
+			  "\x62\xa3\x89\xdc\x11\x11\x45\xa8"
+			  "\x82\x89\xa0\xf1\x4e\xc7\x0f\x11"
+			  "\x69\xdd\x0c\x84\x2b\x89\x5c\xdc"
+			  "\xf0\xde\x01\xef\xc5\x65\x79\x23"
+			  "\x87\x67\xd6\x50\xd9\x8d\xd9\x92"
+			  "\x54\x5b\x0e",
+		.ilen	= 91,
+		.rlen	= 91,
+	}, {
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x67\xc6\x69\x73"
+			  "\x51\xff\x4a\xec\x29\xcd\xba\xab"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.result	= "\xdf\x2d\xc6\x21\x2a\x9d\xa1\xbb"
+			  "\xc2\x77\x66\x0c\x5c\x46\xef\xa7"
+			  "\x79\x1b\xb9\xdf\x55\xe2\xf9\x61"
+			  "\x4c\x7b\xa4\x52\x24\xaf\xa2\xda"
+			  "\xd1\x8f\x8f\xa2\x9e\x53\x4d\xc4"
+			  "\xb8\x55\x98\x08\x7c\x08\xd4\x18"
+			  "\x67\x8f\xef\x50\xb1\x5f\xa5\x77"
+			  "\x4c\x25\xe7\x86\x26\x42\xca\x44",
+		.ilen	= 64,
+		.rlen	= 64,
+	}, {
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\xf2\xfb\xe3\x46"
+			  "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d"
+			  "\x01\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+			  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+			  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+			  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+			  "\x64\x65\x64\x20\x62\x79\x20\x74"
+			  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+			  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+			  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+			  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+			  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+			  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+			  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+			  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+			  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+			  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+			  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+			  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+			  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+			  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+			  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+			  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+			  "\x45\x54\x46\x20\x61\x63\x74\x69"
+			  "\x76\x69\x74\x79\x20\x69\x73\x20"
+			  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+			  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+			  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+			  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+			  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+			  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+			  "\x54\x46\x20\x73\x65\x73\x73\x69"
+			  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+			  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+			  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+			  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+			  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+			  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+			  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+			  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+			  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+			  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+			  "\x20\x77\x68\x69\x63\x68\x20\x61"
+			  "\x72\x65\x20\x61\x64\x64\x72\x65"
+			  "\x73\x73\x65\x64\x20\x74\x6f",
+		.result	= "\xe4\xa6\xc8\x30\xc4\x23\x13\xd6"
+			  "\x08\x4d\xc9\xb7\xa5\x64\x7c\xb9"
+			  "\x71\xe2\xab\x3e\xa8\x30\x8a\x1c"
+			  "\x4a\x94\x6d\x9b\xe0\xb3\x6f\xf1"
+			  "\xdc\xe3\x1b\xb3\xa9\x6d\x0d\xd6"
+			  "\xd0\xca\x12\xef\xe7\x5f\xd8\x61"
+			  "\x3c\x82\xd3\x99\x86\x3c\x6f\x66"
+			  "\x02\x06\xdc\x55\xf9\xed\xdf\x38"
+			  "\xb4\xa6\x17\x00\x7f\xef\xbf\x4f"
+			  "\xf8\x36\xf1\x60\x7e\x47\xaf\xdb"
+			  "\x55\x9b\x12\xcb\x56\x44\xa7\x1f"
+			  "\xd3\x1a\x07\x3b\x00\xec\xe6\x4c"
+			  "\xa2\x43\x27\xdf\x86\x19\x4f\x16"
+			  "\xed\xf9\x4a\xf3\x63\x6f\xfa\x7f"
+			  "\x78\x11\xf6\x7d\x97\x6f\xec\x6f"
+			  "\x85\x0f\x5c\x36\x13\x8d\x87\xe0"
+			  "\x80\xb1\x69\x0b\x98\x89\x9c\x4e"
+			  "\xf8\xdd\xee\x5c\x0a\x85\xce\xd4"
+			  "\xea\x1b\x48\xbe\x08\xf8\xe2\xa8"
+			  "\xa5\xb0\x3c\x79\xb1\x15\xb4\xb9"
+			  "\x75\x10\x95\x35\x81\x7e\x26\xe6"
+			  "\x78\xa4\x88\xcf\xdb\x91\x34\x18"
+			  "\xad\xd7\x8e\x07\x7d\xab\x39\xf9"
+			  "\xa3\x9e\xa5\x1d\xbb\xed\x61\xfd"
+			  "\xdc\xb7\x5a\x27\xfc\xb5\xc9\x10"
+			  "\xa8\xcc\x52\x7f\x14\x76\x90\xe7"
+			  "\x1b\x29\x60\x74\xc0\x98\x77\xbb"
+			  "\xe0\x54\xbb\x27\x49\x59\x1e\x62"
+			  "\x3d\xaf\x74\x06\xa4\x42\x6f\xc6"
+			  "\x52\x97\xc4\x1d\xc4\x9f\xe2\xe5"
+			  "\x38\x57\x91\xd1\xa2\x28\xcc\x40"
+			  "\xcc\x70\x59\x37\xfc\x9f\x4b\xda"
+			  "\xa0\xeb\x97\x9a\x7d\xed\x14\x5c"
+			  "\x9c\xb7\x93\x26\x41\xa8\x66\xdd"
+			  "\x87\x6a\xc0\xd3\xc2\xa9\x3e\xae"
+			  "\xe9\x72\xfe\xd1\xb3\xac\x38\xea"
+			  "\x4d\x15\xa9\xd5\x36\x61\xe9\x96"
+			  "\x6c\x23\xf8\x43\xe4\x92\x29\xd9"
+			  "\x8b\x78\xf7\x0a\x52\xe0\x19\x5b"
+			  "\x59\x69\x5b\x5d\xa1\x53\xc4\x68"
+			  "\xe1\xbb\xac\x89\x14\xe2\xe2\x85"
+			  "\x41\x18\xf5\xb3\xd1\xfa\x68\x19"
+			  "\x44\x78\xdc\xcf\xe7\x88\x2d\x52"
+			  "\x5f\x40\xb5\x7e\xf8\x88\xa2\xae"
+			  "\x4a\xb2\x07\x35\x9d\x9b\x07\x88"
+			  "\xb7\x00\xd0\x0c\xb6\xa0\x47\x59"
+			  "\xda\x4e\xc9\xab\x9b\x8a\x7b",
+
+		.ilen	= 375,
+		.rlen	= 375,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 375 - 20, 4, 16 },
+
+	}, {
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x76\x5a\x2e\x63"
+			  "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7"
+			  "\x2a\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x27\x54\x77\x61\x73\x20\x62\x72"
+			  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+			  "\x6e\x64\x20\x74\x68\x65\x20\x73"
+			  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+			  "\x76\x65\x73\x0a\x44\x69\x64\x20"
+			  "\x67\x79\x72\x65\x20\x61\x6e\x64"
+			  "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+			  "\x69\x6e\x20\x74\x68\x65\x20\x77"
+			  "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+			  "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+			  "\x65\x72\x65\x20\x74\x68\x65\x20"
+			  "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+			  "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+			  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+			  "\x72\x61\x74\x68\x73\x20\x6f\x75"
+			  "\x74\x67\x72\x61\x62\x65\x2e",
+		.result	= "\xb9\x68\xbc\x6a\x24\xbc\xcc\xd8"
+			  "\x9b\x2a\x8d\x5b\x96\xaf\x56\xe3"
+			  "\x11\x61\xe7\xa7\x9b\xce\x4e\x7d"
+			  "\x60\x02\x48\xac\xeb\xd5\x3a\x26"
+			  "\x9d\x77\x3b\xb5\x32\x13\x86\x8e"
+			  "\x20\x82\x26\x72\xae\x64\x1b\x7e"
+			  "\x2e\x01\x68\xb4\x87\x45\xa1\x24"
+			  "\xe4\x48\x40\xf0\xaa\xac\xee\xa9"
+			  "\xfc\x31\xad\x9d\x89\xa3\xbb\xd2"
+			  "\xe4\x25\x13\xad\x0f\x5e\xdf\x3c"
+			  "\x27\xab\xb8\x62\x46\x22\x30\x48"
+			  "\x55\x2c\x4e\x84\x78\x1d\x0d\x34"
+			  "\x8d\x3c\x91\x0a\x7f\x5b\x19\x9f"
+			  "\x97\x05\x4c\xa7\x62\x47\x8b\xc5"
+			  "\x44\x2e\x20\x33\xdd\xa0\x82\xa9"
+			  "\x25\x76\x37\xe6\x3c\x67\x5b",
+		.ilen	= 127,
+		.rlen	= 127,
+	}, {
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01\x31\x58\xa3\x5a"
+			  "\x25\x5d\x05\x17\x58\xe9\x5e\xd4"
+			  "\x1c\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+			  "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
+			  "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
+			  "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
+			  "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4"
+			  "\x19\x4b\x01\x0f\x4e\xa4\x43\xce"
+			  "\x01\xc6\x67\xda\x03\x91\x18\x90"
+			  "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac"
+			  "\x74\x92\xd3\x53\x47\xc8\xdd\x25"
+			  "\x53\x6c\x02\x03\x87\x0d\x11\x0c"
+			  "\x58\xe3\x12\x18\xfd\x2a\x5b\x40"
+			  "\x0c\x30\xf0\xb8\x3f\x43\xce\xae"
+			  "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc"
+			  "\x33\x97\xc3\x77\xba\xc5\x70\xde"
+			  "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f"
+			  "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c"
+			  "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c"
+			  "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe"
+			  "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6"
+			  "\x79\x49\x41\xf4\x58\x18\xcb\x86"
+			  "\x7f\x30\x0e\xf8\x7d\x44\x36\xea"
+			  "\x75\xeb\x88\x84\x40\x3c\xad\x4f"
+			  "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5"
+			  "\x21\x66\xe9\xa7\xe3\xb2\x15\x88"
+			  "\x78\xf6\x79\xa1\x59\x47\x12\x4e"
+			  "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08"
+			  "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b"
+			  "\xdd\x60\x71\xf7\x47\x8c\x61\xc3"
+			  "\xda\x8a\x78\x1e\x16\xfa\x1e\x86"
+			  "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7"
+			  "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4"
+			  "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6"
+			  "\xf4\xe6\x33\x43\x84\x93\xa5\x67"
+			  "\x9b\x16\x58\x58\x80\x0f\x2b\x5c"
+			  "\x24\x74\x75\x7f\x95\x81\xb7\x30"
+			  "\x7a\x33\xa7\xf7\x94\x87\x32\x27"
+			  "\x10\x5d\x14\x4c\x43\x29\xdd\x26"
+			  "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10"
+			  "\xea\x6b\x64\xfd\x73\xc6\xed\xec"
+			  "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07"
+			  "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5"
+			  "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1"
+			  "\xec\xca\x60\x09\x4c\x6a\xd5\x09"
+			  "\x49\x46\x00\x88\x22\x8d\xce\xea"
+			  "\xb1\x17\x11\xde\x42\xd2\x23\xc1"
+			  "\x72\x11\xf5\x50\x73\x04\x40\x47"
+			  "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0"
+			  "\x3f\x58\xc1\x52\xab\x12\x67\x9d"
+			  "\x3f\x43\x4b\x68\xd4\x9c\x68\x38"
+			  "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b"
+			  "\xf9\xe5\x31\x69\x22\xf9\xa6\x69"
+			  "\xc6\x9c\x96\x9a\x12\x35\x95\x1d"
+			  "\x95\xd5\xdd\xbe\xbf\x93\x53\x24"
+			  "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00"
+			  "\x6f\x88\xc4\x37\x18\x69\x7c\xd7"
+			  "\x41\x92\x55\x4c\x03\xa1\x9a\x4b"
+			  "\x15\xe5\xdf\x7f\x37\x33\x72\xc1"
+			  "\x8b\x10\x67\xa3\x01\x57\x94\x25"
+			  "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73"
+			  "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda"
+			  "\x58\xb1\x47\x90\xfe\x42\x21\x72"
+			  "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd"
+			  "\xc6\x84\x6e\xca\xae\xe3\x68\xb4"
+			  "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b"
+			  "\x03\xa1\x31\xd9\xde\x8d\xf5\x22"
+			  "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76"
+			  "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe"
+			  "\x54\xf7\x27\x1b\xf4\xde\x02\xf5"
+			  "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e"
+			  "\x4b\x6e\xed\x46\x23\xdc\x65\xb2"
+			  "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7"
+			  "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8"
+			  "\x65\x69\x8a\x45\x29\xef\x74\x85"
+			  "\xde\x79\xc7\x08\xae\x30\xb0\xf4"
+			  "\xa3\x1d\x51\x41\xab\xce\xcb\xf6"
+			  "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3"
+			  "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7"
+			  "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9"
+			  "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a"
+			  "\x9f\xd7\xb9\x6c\x65\x14\x22\x45"
+			  "\x6e\x45\x32\x3e\x7e\x60\x1a\x12"
+			  "\x97\x82\x14\xfb\xaa\x04\x22\xfa"
+			  "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d"
+			  "\x78\x33\x5a\x7c\xad\xdb\x29\xce"
+			  "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac"
+			  "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21"
+			  "\x83\x35\x7e\xad\x73\xc2\xb5\x6c"
+			  "\x10\x26\x38\x07\xe5\xc7\x36\x80"
+			  "\xe2\x23\x12\x61\xf5\x48\x4b\x2b"
+			  "\xc5\xdf\x15\xd9\x87\x01\xaa\xac"
+			  "\x1e\x7c\xad\x73\x78\x18\x63\xe0"
+			  "\x8b\x9f\x81\xd8\x12\x6a\x28\x10"
+			  "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c"
+			  "\x83\x66\x80\x47\x80\xe8\xfd\x35"
+			  "\x1c\x97\x6f\xae\x49\x10\x66\xcc"
+			  "\xc6\xd8\xcc\x3a\x84\x91\x20\x77"
+			  "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9"
+			  "\x25\x94\x10\x5f\x40\x00\x64\x99"
+			  "\xdc\xae\xd7\x21\x09\x78\x50\x15"
+			  "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39"
+			  "\x87\x6e\x6d\xab\xde\x08\x51\x16"
+			  "\xc7\x13\xe9\xea\xed\x06\x8e\x2c"
+			  "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43"
+			  "\xb6\x98\x37\xb2\x43\xed\xde\xdf"
+			  "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b"
+			  "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9"
+			  "\x80\x55\xc9\x34\x91\xd1\x59\xe8"
+			  "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06"
+			  "\x20\xa8\x5d\xfa\xd1\xde\x70\x56"
+			  "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8"
+			  "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5"
+			  "\x44\x4b\x9f\xc2\x93\x03\xea\x2b"
+			  "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23"
+			  "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee"
+			  "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab"
+			  "\x82\x6b\x37\x04\xeb\x74\xbe\x79"
+			  "\xb9\x83\x90\xef\x20\x59\x46\xff"
+			  "\xe9\x97\x3e\x2f\xee\xb6\x64\x18"
+			  "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a"
+			  "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4"
+			  "\xce\xd3\x91\x49\x88\xc7\xb8\x4d"
+			  "\xb1\xb9\x07\x6d\x16\x72\xae\x46"
+			  "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8"
+			  "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62"
+			  "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9"
+			  "\x94\x97\xea\xdd\x58\x9e\xae\x76"
+			  "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde"
+			  "\xf7\x32\x87\xcd\x93\xbf\x11\x56"
+			  "\x11\xbe\x08\x74\xe1\x69\xad\xe2"
+			  "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe"
+			  "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76"
+			  "\x35\xea\x5d\x85\x81\xaf\x85\xeb"
+			  "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc"
+			  "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a"
+			  "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9"
+			  "\x37\x1c\xeb\x46\x54\x3f\xa5\x91"
+			  "\xc2\xb5\x8c\xfe\x53\x08\x97\x32"
+			  "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc"
+			  "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1"
+			  "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c"
+			  "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd"
+			  "\x20\x4e\x7c\x51\xb0\x60\x73\xb8"
+			  "\x9c\xac\x91\x90\x7e\x01\xb0\xe1"
+			  "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a"
+			  "\x06\x52\x95\x52\xb2\xe9\x25\x2e"
+			  "\x4c\xe2\x5a\x00\xb2\x13\x81\x03"
+			  "\x77\x66\x0d\xa5\x99\xda\x4e\x8c"
+			  "\xac\xf3\x13\x53\x27\x45\xaf\x64"
+			  "\x46\xdc\xea\x23\xda\x97\xd1\xab"
+			  "\x7d\x6c\x30\x96\x1f\xbc\x06\x34"
+			  "\x18\x0b\x5e\x21\x35\x11\x8d\x4c"
+			  "\xe0\x2d\xe9\x50\x16\x74\x81\xa8"
+			  "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc"
+			  "\xca\x34\x83\x27\x10\x5b\x68\x45"
+			  "\x8f\x52\x22\x0c\x55\x3d\x29\x7c"
+			  "\xe3\xc0\x66\x05\x42\x91\x5f\x58"
+			  "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19"
+			  "\x04\xa9\x08\x4b\x57\xfc\x67\x53"
+			  "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
+			  "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
+			  "\x72",
+		.result	= "\xe1\xb6\x8b\x5c\x80\xb8\xcc\x08"
+			  "\x1b\x84\xb2\xd1\xad\xa4\x70\xac"
+			  "\x67\xa9\x39\x27\xac\xb4\x5b\xb7"
+			  "\x4c\x26\x77\x23\x1d\xce\x0a\xbe"
+			  "\x18\x9e\x42\x8b\xbd\x7f\xd6\xf1"
+			  "\xf1\x6b\xe2\x6d\x7f\x92\x0e\xcb"
+			  "\xb8\x79\xba\xb4\xac\x7e\x2d\xc0"
+			  "\x9e\x83\x81\x91\xd5\xea\xc3\x12"
+			  "\x8d\xa4\x26\x70\xa4\xf9\x71\x0b"
+			  "\xbd\x2e\xe1\xb3\x80\x42\x25\xb3"
+			  "\x0b\x31\x99\xe1\x0d\xde\xa6\x90"
+			  "\xf2\xa3\x10\xf7\xe5\xf3\x83\x1e"
+			  "\x2c\xfb\x4d\xf0\x45\x3d\x28\x3c"
+			  "\xb8\xf1\xcb\xbf\x67\xd8\x43\x5a"
+			  "\x9d\x7b\x73\x29\x88\x0f\x13\x06"
+			  "\x37\x50\x0d\x7c\xe6\x9b\x07\xdd"
+			  "\x7e\x01\x1f\x81\x90\x10\x69\xdb"
+			  "\xa4\xad\x8a\x5e\xac\x30\x72\xf2"
+			  "\x36\xcd\xe3\x23\x49\x02\x93\xfa"
+			  "\x3d\xbb\xe2\x98\x83\xeb\xe9\x8d"
+			  "\xb3\x8f\x11\xaa\x53\xdb\xaf\x2e"
+			  "\x95\x13\x99\x3d\x71\xbd\x32\x92"
+			  "\xdd\xfc\x9d\x5e\x6f\x63\x2c\xee"
+			  "\x91\x1f\x4c\x64\x3d\x87\x55\x0f"
+			  "\xcc\x3d\x89\x61\x53\x02\x57\x8f"
+			  "\xe4\x77\x29\x32\xaf\xa6\x2f\x0a"
+			  "\xae\x3c\x3f\x3f\xf4\xfb\x65\x52"
+			  "\xc5\xc1\x78\x78\x53\x28\xad\xed"
+			  "\xd1\x67\x37\xc7\x59\x70\xcd\x0a"
+			  "\xb8\x0f\x80\x51\x9f\xc0\x12\x5e"
+			  "\x06\x0a\x7e\xec\x24\x5f\x73\x00"
+			  "\xb1\x0b\x31\x47\x4f\x73\x8d\xb4"
+			  "\xce\xf3\x55\x45\x6c\x84\x27\xba"
+			  "\xb9\x6f\x03\x4a\xeb\x98\x88\x6e"
+			  "\x53\xed\x25\x19\x0d\x8f\xfe\xca"
+			  "\x60\xe5\x00\x93\x6e\x3c\xff\x19"
+			  "\xae\x08\x3b\x8a\xa6\x84\x05\xfe"
+			  "\x9b\x59\xa0\x8c\xc8\x05\x45\xf5"
+			  "\x05\x37\xdc\x45\x6f\x8b\x95\x8c"
+			  "\x4e\x11\x45\x7a\xce\x21\xa5\xf7"
+			  "\x71\x67\xb9\xce\xd7\xf9\xe9\x5e"
+			  "\x60\xf5\x53\x7a\xa8\x85\x14\x03"
+			  "\xa0\x92\xec\xf3\x51\x80\x84\xc4"
+			  "\xdc\x11\x9e\x57\xce\x4b\x45\xcf"
+			  "\x90\x95\x85\x0b\x96\xe9\xee\x35"
+			  "\x10\xb8\x9b\xf2\x59\x4a\xc6\x7e"
+			  "\x85\xe5\x6f\x38\x51\x93\x40\x0c"
+			  "\x99\xd7\x7f\x32\xa8\x06\x27\xd1"
+			  "\x2b\xd5\xb5\x3a\x1a\xe1\x5e\xda"
+			  "\xcd\x5a\x50\x30\x3c\xc7\xe7\x65"
+			  "\xa6\x07\x0b\x98\x91\xc6\x20\x27"
+			  "\x2a\x03\x63\x1b\x1e\x3d\xaf\xc8"
+			  "\x71\x48\x46\x6a\x64\x28\xf9\x3d"
+			  "\xd1\x1d\xab\xc8\x40\x76\xc2\x39"
+			  "\x4e\x00\x75\xd2\x0e\x82\x58\x8c"
+			  "\xd3\x73\x5a\xea\x46\x89\xbe\xfd"
+			  "\x4e\x2c\x0d\x94\xaa\x9b\x68\xac"
+			  "\x86\x87\x30\x7e\xa9\x16\xcd\x59"
+			  "\xd2\xa6\xbe\x0a\xd8\xf5\xfd\x2d"
+			  "\x49\x69\xd2\x1a\x90\xd2\x1b\xed"
+			  "\xff\x71\x04\x87\x87\x21\xc4\xb8"
+			  "\x1f\x5b\x51\x33\xd0\xd6\x59\x9a"
+			  "\x03\x0e\xd3\x8b\xfb\x57\x73\xfd"
+			  "\x5a\x52\x63\x82\xc8\x85\x2f\xcb"
+			  "\x74\x6d\x4e\xd9\x68\x37\x85\x6a"
+			  "\xd4\xfb\x94\xed\x8d\xd1\x1a\xaf"
+			  "\x76\xa7\xb7\x88\xd0\x2b\x4e\xda"
+			  "\xec\x99\x94\x27\x6f\x87\x8c\xdf"
+			  "\x4b\x5e\xa6\x66\xdd\xcb\x33\x7b"
+			  "\x64\x94\x31\xa8\x37\xa6\x1d\xdb"
+			  "\x0d\x5c\x93\xa4\x40\xf9\x30\x53"
+			  "\x4b\x74\x8d\xdd\xf6\xde\x3c\xac"
+			  "\x5c\x80\x01\x3a\xef\xb1\x9a\x02"
+			  "\x0c\x22\x8e\xe7\x44\x09\x74\x4c"
+			  "\xf2\x9a\x27\x69\x7f\x12\x32\x36"
+			  "\xde\x92\xdf\xde\x8f\x5b\x31\xab"
+			  "\x4a\x01\x26\xe0\xb1\xda\xe8\x37"
+			  "\x21\x64\xe8\xff\x69\xfc\x9e\x41"
+			  "\xd2\x96\x2d\x18\x64\x98\x33\x78"
+			  "\x24\x61\x73\x9b\x47\x29\xf1\xa7"
+			  "\xcb\x27\x0f\xf0\x85\x6d\x8c\x9d"
+			  "\x2c\x95\x9e\xe5\xb2\x8e\x30\x29"
+			  "\x78\x8a\x9d\x65\xb4\x8e\xde\x7b"
+			  "\xd9\x00\x50\xf5\x7f\x81\xc3\x1b"
+			  "\x25\x85\xeb\xc2\x8c\x33\x22\x1e"
+			  "\x68\x38\x22\x30\xd8\x2e\x00\x98"
+			  "\x85\x16\x06\x56\xb4\x81\x74\x20"
+			  "\x95\xdb\x1c\x05\x19\xe8\x23\x4d"
+			  "\x65\x5d\xcc\xd8\x7f\xc4\x2d\x0f"
+			  "\x57\x26\x71\x07\xad\xaa\x71\x9f"
+			  "\x19\x76\x2f\x25\x51\x88\xe4\xc0"
+			  "\x82\x6e\x08\x05\x37\x04\xee\x25"
+			  "\x23\x90\xe9\x4e\xce\x9b\x16\xc1"
+			  "\x31\xe7\x6e\x2c\x1b\xe1\x85\x9a"
+			  "\x0c\x8c\xbb\x12\x1e\x68\x7b\x93"
+			  "\xa9\x3c\x39\x56\x23\x3e\x6e\xc7"
+			  "\x77\x84\xd3\xe0\x86\x59\xaa\xb9"
+			  "\xd5\x53\x58\xc9\x0a\x83\x5f\x85"
+			  "\xd8\x47\x14\x67\x8a\x3c\x17\xe0"
+			  "\xab\x02\x51\xea\xf1\xf0\x4f\x30"
+			  "\x7d\xe0\x92\xc2\x5f\xfb\x19\x5a"
+			  "\x3f\xbd\xf4\x39\xa4\x31\x0c\x39"
+			  "\xd1\xae\x4e\xf7\x65\x7f\x1f\xce"
+			  "\xc2\x39\xd1\x84\xd4\xe5\x02\xe0"
+			  "\x58\xaa\xf1\x5e\x81\xaf\x7f\x72"
+			  "\x0f\x08\x99\x43\xb9\xd8\xac\x41"
+			  "\x35\x55\xf2\xb2\xd4\x98\xb8\x3b"
+			  "\x2b\x3c\x3e\x16\x06\x31\xfc\x79"
+			  "\x47\x38\x63\x51\xc5\xd0\x26\xd7"
+			  "\x43\xb4\x2b\xd9\xc5\x05\xf2\x9d"
+			  "\x18\xc9\x26\x82\x56\xd2\x11\x05"
+			  "\xb6\x89\xb4\x43\x9c\xb5\x9d\x11"
+			  "\x6c\x83\x37\x71\x27\x1c\xae\xbf"
+			  "\xcd\x57\xd2\xee\x0d\x5a\x15\x26"
+			  "\x67\x88\x80\x80\x1b\xdc\xc1\x62"
+			  "\xdd\x4c\xff\x92\x5c\x6c\xe1\xa0"
+			  "\xe3\x79\xa9\x65\x8c\x8c\x14\x42"
+			  "\xe5\x11\xd2\x1a\xad\xa9\x56\x6f"
+			  "\x98\xfc\x8a\x7b\x56\x1f\xc6\xc1"
+			  "\x52\x12\x92\x9b\x41\x0f\x4b\xae"
+			  "\x1b\x4a\xbc\xfe\x23\xb6\x94\x70"
+			  "\x04\x30\x9e\x69\x47\xbe\xb8\x8f"
+			  "\xca\x45\xd7\x8a\xf4\x78\x3e\xaa"
+			  "\x71\x17\xd8\x1e\xb8\x11\x8f\xbc"
+			  "\xc8\x1a\x65\x7b\x41\x89\x72\xc7"
+			  "\x5f\xbe\xc5\x2a\xdb\x5c\x54\xf9"
+			  "\x25\xa3\x7a\x80\x56\x9c\x8c\xab"
+			  "\x26\x19\x10\x36\xa6\xf3\x14\x79"
+			  "\x40\x98\x70\x68\xb7\x35\xd9\xb9"
+			  "\x27\xd4\xe7\x74\x5b\x3d\x97\xb4"
+			  "\xd9\xaa\xd9\xf2\xb5\x14\x84\x1f"
+			  "\xa9\xde\x12\x44\x5b\x00\xc0\xbc"
+			  "\xc8\x11\x25\x1b\x67\x7a\x15\x72"
+			  "\xa6\x31\x6f\xf4\x68\x7a\x86\x9d"
+			  "\x43\x1c\x5f\x16\xd3\xad\x2e\x52"
+			  "\xf3\xb4\xc3\xfa\x27\x2e\x68\x6c"
+			  "\x06\xe7\x4c\x4f\xa2\xe0\xe4\x21"
+			  "\x5d\x9e\x33\x58\x8d\xbf\xd5\x70"
+			  "\xf8\x80\xa5\xdd\xe7\x18\x79\xfa"
+			  "\x7b\xfd\x09\x69\x2c\x37\x32\xa8"
+			  "\x65\xfa\x8d\x8b\x5c\xcc\xe8\xf3"
+			  "\x37\xf6\xa6\xc6\x5c\xa2\x66\x79"
+			  "\xfa\x8a\xa7\xd1\x0b\x2e\x1b\x5e"
+			  "\x95\x35\x00\x76\xae\x42\xf7\x50"
+			  "\x51\x78\xfb\xb4\x28\x24\xde\x1a"
+			  "\x70\x8b\xed\xca\x3c\x5e\xe4\xbd"
+			  "\x28\xb5\xf3\x76\x4f\x67\x5d\x81"
+			  "\xb2\x60\x87\xd9\x7b\x19\x1a\xa7"
+			  "\x79\xa2\xfa\x3f\x9e\xa9\xd7\x25"
+			  "\x61\xe1\x74\x31\xa2\x77\xa0\x1b"
+			  "\xf6\xf7\xcb\xc5\xaa\x9e\xce\xf9"
+			  "\x9b\x96\xef\x51\xc3\x1a\x44\x96"
+			  "\xae\x17\x50\xab\x29\x08\xda\xcc"
+			  "\x1a\xb3\x12\xd0\x24\xe4\xe2\xe0"
+			  "\xc6\xe3\xcc\x82\xd0\xba\x47\x4c"
+			  "\x3f\x49\xd7\xe8\xb6\x61\xaa\x65"
+			  "\x25\x18\x40\x2d\x62\x25\x02\x71"
+			  "\x61\xa2\xc1\xb2\x13\xd2\x71\x3f"
+			  "\x43\x1a\xc9\x09\x92\xff\xd5\x57"
+			  "\xf0\xfc\x5e\x1c\xf1\xf5\xf9\xf3"
+			  "\x5b",
+		.ilen	= 1281,
+		.rlen	= 1281,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 1200, 1, 80 },
+	},
+};
+
+/* Adiantum test vectors from https://github.com/google/adiantum */
+static struct cipher_testvec adiantum_xchacha12_aes_enc_tv_template[] = {
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.klen	= 32,
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.input	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.result	= "\x6d\x32\x86\x18\x67\x86\x0f\x3f"
+			  "\x96\x7c\x9d\x28\x0d\x53\xec\x9f",
+		.ilen	= 16,
+		.rlen	= 16,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 14, 2 },
+	}, {
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.klen	= 32,
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.input	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.result	= "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a"
+			  "\x39\xbe\x78\xbe\x8d\x28\xc8\x89"
+			  "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e"
+			  "\xc9\x18\x7b\xbe\x18\x60\x50",
+		.ilen	= 31,
+		.rlen	= 31,
+	}, {
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.klen	= 32,
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.input	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.result	= "\x9e\x16\xab\xed\x4b\xa7\x42\x5a"
+			  "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0"
+			  "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e"
+			  "\x21\x48\xa0\xb8\x65\x48\x27\x48"
+			  "\x84\x54\x54\xb2\x9a\x94\x7b\xe6"
+			  "\x4b\x29\xe9\xcf\x05\x91\x80\x1a"
+			  "\x3a\xf3\x41\x96\x85\x1d\x9f\x74"
+			  "\x51\x56\x63\xfa\x7c\x28\x85\x49"
+			  "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33"
+			  "\x80\xa3\x3c\xce\xb2\x57\x93\xf5"
+			  "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93"
+			  "\x66\xe0\x30\x77\x16\xe4\xa0\x31"
+			  "\xba\x70\xbc\x68\x13\xf5\xb0\x9a"
+			  "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48"
+			  "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5"
+			  "\x8d\xde\x34\x86\x78\x60\x75\x8d",
+		.ilen	= 128,
+		.rlen	= 128,
+		.also_non_np = 1,
+		.np	= 4,
+		.tap	= { 104, 16, 4, 4 },
+	}, {
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.klen	= 32,
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.input	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.result	= "\x15\x97\xd0\x86\x18\x03\x9c\x51"
+			  "\xc5\x11\x36\x62\x13\x92\xe6\x73"
+			  "\x29\x79\xde\xa1\x00\x3e\x08\x64"
+			  "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c"
+			  "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2"
+			  "\x89\xe6\xbc\xdf\x0c\x33\x27\x42"
+			  "\x46\x73\x2f\xba\x4e\xa6\x46\x8f"
+			  "\xe4\xee\x39\x63\x42\x65\xa3\x88"
+			  "\x7a\xad\x33\x23\xa9\xa7\x20\x7f"
+			  "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4"
+			  "\xd6\x07\x8a\x77\x26\xd1\xab\x44"
+			  "\x99\x55\x03\x5e\xed\x8d\x7b\xbd"
+			  "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5"
+			  "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1"
+			  "\x30\x0a\xd0\xa6\xa9\x28\x69\xae"
+			  "\x2a\xe6\x54\xac\x82\x9d\x6a\x95"
+			  "\x6f\x06\x44\xc5\x5a\x77\x6e\xec"
+			  "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e"
+			  "\x0e\x8a\x62\x00\x03\xc8\x84\xdd"
+			  "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf"
+			  "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11"
+			  "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4"
+			  "\xf8\x51\x80\x39\x14\x05\x12\xdb"
+			  "\x87\x93\xe2\x26\x30\x9c\x3a\x21"
+			  "\xe5\xd0\x38\x57\x80\x15\xe4\x08"
+			  "\x58\x05\x49\x7d\xe6\x92\x77\x70"
+			  "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68"
+			  "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8"
+			  "\x2c\x78\x78\x61\xcf\xe3\xde\x69"
+			  "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03"
+			  "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe"
+			  "\x90\x62\xb2\x28\x99\x86\xf5\x44"
+			  "\x99\xeb\x31\xcf\xca\xdf\xd0\x21"
+			  "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7"
+			  "\xab\xe1\x9b\x45\xba\x66\xda\xee"
+			  "\xdd\x04\x12\x40\x98\xe1\x69\xe5"
+			  "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63"
+			  "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62"
+			  "\x11\x34\x61\x94\x35\xfe\xf2\x99"
+			  "\xfd\xee\x19\xea\x95\xb6\x12\xbf"
+			  "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65"
+			  "\x78\x74\x10\x50\x29\x63\x28\xea"
+			  "\x6b\xab\xd4\x06\x4d\x15\x24\x31"
+			  "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf"
+			  "\x49\xdb\x68\x71\x31\x8f\x87\xe2"
+			  "\x13\x05\x64\xd6\x22\x0c\xf8\x36"
+			  "\x84\x24\x3e\x69\x5e\xb8\x9e\x16"
+			  "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba"
+			  "\xe5\x59\x21\x33\x1b\xa9\x26\xc2"
+			  "\xc7\xd9\x30\x73\xb6\xa6\x73\x82"
+			  "\x19\xfa\x44\x4d\x40\x8b\x69\x04"
+			  "\x94\x74\xea\x6e\xb3\x09\x47\x01"
+			  "\x2a\xb9\x78\x34\x43\x11\xed\xd6"
+			  "\x8c\x95\x65\x1b\x85\x67\xa5\x40"
+			  "\xac\x9c\x05\x4b\x57\x4a\xa9\x96"
+			  "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7"
+			  "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e"
+			  "\xa6\x65\xd7\x55\x81\xb7\xed\x11"
+			  "\x9b\x40\x75\xa8\x6b\x56\xaf\x16"
+			  "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d"
+			  "\x85\xc2\xc0\xde\x43\x39\x4a\x96"
+			  "\xba\x88\x97\xc0\xd6\x00\x0e\x27"
+			  "\x21\xb0\x21\x52\xba\xa7\x37\xaa"
+			  "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6",
+		.ilen	= 512,
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 144, 368 },
+	}
+};
+
+static struct cipher_testvec adiantum_xchacha12_aes_dec_tv_template[] = {
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.klen	= 32,
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.result	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.input	= "\x6d\x32\x86\x18\x67\x86\x0f\x3f"
+			  "\x96\x7c\x9d\x28\x0d\x53\xec\x9f",
+		.ilen	= 16,
+		.rlen	= 16,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 14, 2 },
+	}, {
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.klen	= 32,
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.result	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.input	= "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a"
+			  "\x39\xbe\x78\xbe\x8d\x28\xc8\x89"
+			  "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e"
+			  "\xc9\x18\x7b\xbe\x18\x60\x50",
+		.ilen	= 31,
+		.rlen	= 31,
+	}, {
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.klen	= 32,
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.result	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.input	= "\x9e\x16\xab\xed\x4b\xa7\x42\x5a"
+			  "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0"
+			  "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e"
+			  "\x21\x48\xa0\xb8\x65\x48\x27\x48"
+			  "\x84\x54\x54\xb2\x9a\x94\x7b\xe6"
+			  "\x4b\x29\xe9\xcf\x05\x91\x80\x1a"
+			  "\x3a\xf3\x41\x96\x85\x1d\x9f\x74"
+			  "\x51\x56\x63\xfa\x7c\x28\x85\x49"
+			  "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33"
+			  "\x80\xa3\x3c\xce\xb2\x57\x93\xf5"
+			  "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93"
+			  "\x66\xe0\x30\x77\x16\xe4\xa0\x31"
+			  "\xba\x70\xbc\x68\x13\xf5\xb0\x9a"
+			  "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48"
+			  "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5"
+			  "\x8d\xde\x34\x86\x78\x60\x75\x8d",
+		.ilen	= 128,
+		.rlen	= 128,
+		.also_non_np = 1,
+		.np	= 4,
+		.tap	= { 104, 16, 4, 4 },
+	}, {
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.klen	= 32,
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.result	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.input	= "\x15\x97\xd0\x86\x18\x03\x9c\x51"
+			  "\xc5\x11\x36\x62\x13\x92\xe6\x73"
+			  "\x29\x79\xde\xa1\x00\x3e\x08\x64"
+			  "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c"
+			  "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2"
+			  "\x89\xe6\xbc\xdf\x0c\x33\x27\x42"
+			  "\x46\x73\x2f\xba\x4e\xa6\x46\x8f"
+			  "\xe4\xee\x39\x63\x42\x65\xa3\x88"
+			  "\x7a\xad\x33\x23\xa9\xa7\x20\x7f"
+			  "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4"
+			  "\xd6\x07\x8a\x77\x26\xd1\xab\x44"
+			  "\x99\x55\x03\x5e\xed\x8d\x7b\xbd"
+			  "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5"
+			  "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1"
+			  "\x30\x0a\xd0\xa6\xa9\x28\x69\xae"
+			  "\x2a\xe6\x54\xac\x82\x9d\x6a\x95"
+			  "\x6f\x06\x44\xc5\x5a\x77\x6e\xec"
+			  "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e"
+			  "\x0e\x8a\x62\x00\x03\xc8\x84\xdd"
+			  "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf"
+			  "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11"
+			  "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4"
+			  "\xf8\x51\x80\x39\x14\x05\x12\xdb"
+			  "\x87\x93\xe2\x26\x30\x9c\x3a\x21"
+			  "\xe5\xd0\x38\x57\x80\x15\xe4\x08"
+			  "\x58\x05\x49\x7d\xe6\x92\x77\x70"
+			  "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68"
+			  "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8"
+			  "\x2c\x78\x78\x61\xcf\xe3\xde\x69"
+			  "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03"
+			  "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe"
+			  "\x90\x62\xb2\x28\x99\x86\xf5\x44"
+			  "\x99\xeb\x31\xcf\xca\xdf\xd0\x21"
+			  "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7"
+			  "\xab\xe1\x9b\x45\xba\x66\xda\xee"
+			  "\xdd\x04\x12\x40\x98\xe1\x69\xe5"
+			  "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63"
+			  "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62"
+			  "\x11\x34\x61\x94\x35\xfe\xf2\x99"
+			  "\xfd\xee\x19\xea\x95\xb6\x12\xbf"
+			  "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65"
+			  "\x78\x74\x10\x50\x29\x63\x28\xea"
+			  "\x6b\xab\xd4\x06\x4d\x15\x24\x31"
+			  "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf"
+			  "\x49\xdb\x68\x71\x31\x8f\x87\xe2"
+			  "\x13\x05\x64\xd6\x22\x0c\xf8\x36"
+			  "\x84\x24\x3e\x69\x5e\xb8\x9e\x16"
+			  "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba"
+			  "\xe5\x59\x21\x33\x1b\xa9\x26\xc2"
+			  "\xc7\xd9\x30\x73\xb6\xa6\x73\x82"
+			  "\x19\xfa\x44\x4d\x40\x8b\x69\x04"
+			  "\x94\x74\xea\x6e\xb3\x09\x47\x01"
+			  "\x2a\xb9\x78\x34\x43\x11\xed\xd6"
+			  "\x8c\x95\x65\x1b\x85\x67\xa5\x40"
+			  "\xac\x9c\x05\x4b\x57\x4a\xa9\x96"
+			  "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7"
+			  "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e"
+			  "\xa6\x65\xd7\x55\x81\xb7\xed\x11"
+			  "\x9b\x40\x75\xa8\x6b\x56\xaf\x16"
+			  "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d"
+			  "\x85\xc2\xc0\xde\x43\x39\x4a\x96"
+			  "\xba\x88\x97\xc0\xd6\x00\x0e\x27"
+			  "\x21\xb0\x21\x52\xba\xa7\x37\xaa"
+			  "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6",
+		.ilen	= 512,
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 144, 368 },
+	}
+};
+
+/* Adiantum with XChaCha20 instead of XChaCha12 */
+/* Test vectors from https://github.com/google/adiantum */
+static struct cipher_testvec adiantum_xchacha20_aes_enc_tv_template[] = {
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.klen	= 32,
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.input	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.result	= "\xf6\x78\x97\xd6\xaa\x94\x01\x27"
+			  "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf",
+		.ilen	= 16,
+		.rlen	= 16,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 5, 2, 9 },
+	}, {
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.klen	= 32,
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.input	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.result	= "\x4b\xb8\x90\x10\xdf\x7f\x64\x08"
+			  "\x0e\x14\x42\x5f\x00\x74\x09\x36"
+			  "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28"
+			  "\x0c\x04\x91\x14\x91\xe9\x37",
+		.ilen	= 31,
+		.rlen	= 31,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 16, 15 },
+	}, {
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.klen	= 32,
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.input	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.result	= "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59"
+			  "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4"
+			  "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67"
+			  "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c"
+			  "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c"
+			  "\x13\xc8\x49\x44\xcc\x0a\x90\x9d"
+			  "\x7c\xdd\x19\x3f\xea\x72\x8d\x58"
+			  "\xab\xe7\x09\x2c\xec\xb5\x44\xd2"
+			  "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15"
+			  "\xec\x2a\xa6\x69\x91\xf9\xf3\x13"
+			  "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94"
+			  "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e"
+			  "\x94\xc1\x91\x14\xa1\x14\xcb\xbe"
+			  "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32"
+			  "\x29\x62\x0d\xb2\xf6\x3c\x58\x57"
+			  "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5",
+		.ilen	= 128,
+		.rlen	= 128,
+		.also_non_np = 1,
+		.np	= 4,
+		.tap	= { 112, 7, 8, 1 },
+	}, {
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.klen	= 32,
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.input	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.result	= "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b"
+			  "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2"
+			  "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44"
+			  "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38"
+			  "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8"
+			  "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb"
+			  "\x8b\x40\x88\x7e\x69\x73\xf7\x16"
+			  "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6"
+			  "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7"
+			  "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0"
+			  "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2"
+			  "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9"
+			  "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06"
+			  "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6"
+			  "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d"
+			  "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b"
+			  "\xe0\x7a\xdd\xec\x32\x73\x42\x32"
+			  "\x7f\x35\x67\x60\x0d\xcf\x10\x52"
+			  "\x61\x22\x53\x8d\x8e\xbb\x33\x76"
+			  "\x59\xd9\x10\xce\xdf\xef\xc0\x41"
+			  "\xd5\x33\x29\x6a\xda\x46\xa4\x51"
+			  "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb"
+			  "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5"
+			  "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70"
+			  "\x26\x39\x95\x07\xad\x7a\xc9\x69"
+			  "\xfe\x81\xc7\x88\x08\x38\xaf\xad"
+			  "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8"
+			  "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6"
+			  "\x78\x62\xec\xa3\x59\xd9\xc6\x9d"
+			  "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c"
+			  "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc"
+			  "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37"
+			  "\x06\x9b\x30\x32\x67\xf7\xe7\xd2"
+			  "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12"
+			  "\x94\xa1\x34\x85\x93\x50\x4b\x0a"
+			  "\x3c\x7d\x49\x25\x01\x41\x6b\x96"
+			  "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93"
+			  "\x1f\x70\x38\xb8\x21\xee\xf6\xa7"
+			  "\xee\xeb\xe7\x81\xa4\x13\xb4\x87"
+			  "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2"
+			  "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8"
+			  "\x34\x42\xe5\xae\x45\x13\x63\xfe"
+			  "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c"
+			  "\x4c\xaf\xf0\x09\x62\x26\x66\x1e"
+			  "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7"
+			  "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47"
+			  "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2"
+			  "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c"
+			  "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a"
+			  "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b"
+			  "\x65\xa8\xac\xea\x8d\x68\x46\x34"
+			  "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a"
+			  "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57"
+			  "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad"
+			  "\x5b\x19\x50\x2f\x3a\xcc\x06\x46"
+			  "\x04\x51\x3f\x91\x97\xf0\xd2\x07"
+			  "\xe7\x93\x89\x7e\xb5\x32\x0f\x03"
+			  "\xe5\x58\x9e\x74\x72\xeb\xc2\x38"
+			  "\x00\x0c\x91\x72\x69\xed\x7d\x6d"
+			  "\xc8\x71\xf0\xec\xff\x80\xd9\x1c"
+			  "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc"
+			  "\xb1\xa6\xbd\xbd\x70\x40\xca\x20"
+			  "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c"
+			  "\xc7\x27\xe1\x6a\x29\xad\xa4\x03",
+		.ilen	= 512,
+		.rlen	= 512,
+	}
+};
+
+static struct cipher_testvec adiantum_xchacha20_aes_dec_tv_template[] = {
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.klen	= 32,
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.result	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.input	= "\xf6\x78\x97\xd6\xaa\x94\x01\x27"
+			  "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf",
+		.ilen	= 16,
+		.rlen	= 16,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 5, 2, 9 },
+	}, {
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.klen	= 32,
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.result	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.input	= "\x4b\xb8\x90\x10\xdf\x7f\x64\x08"
+			  "\x0e\x14\x42\x5f\x00\x74\x09\x36"
+			  "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28"
+			  "\x0c\x04\x91\x14\x91\xe9\x37",
+		.ilen	= 31,
+		.rlen	= 31,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 16, 15 },
+	}, {
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.klen	= 32,
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.result	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.input	= "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59"
+			  "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4"
+			  "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67"
+			  "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c"
+			  "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c"
+			  "\x13\xc8\x49\x44\xcc\x0a\x90\x9d"
+			  "\x7c\xdd\x19\x3f\xea\x72\x8d\x58"
+			  "\xab\xe7\x09\x2c\xec\xb5\x44\xd2"
+			  "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15"
+			  "\xec\x2a\xa6\x69\x91\xf9\xf3\x13"
+			  "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94"
+			  "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e"
+			  "\x94\xc1\x91\x14\xa1\x14\xcb\xbe"
+			  "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32"
+			  "\x29\x62\x0d\xb2\xf6\x3c\x58\x57"
+			  "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5",
+		.ilen	= 128,
+		.rlen	= 128,
+		.also_non_np = 1,
+		.np	= 4,
+		.tap	= { 112, 7, 8, 1 },
+	}, {
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.klen	= 32,
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.result	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.input	= "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b"
+			  "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2"
+			  "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44"
+			  "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38"
+			  "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8"
+			  "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb"
+			  "\x8b\x40\x88\x7e\x69\x73\xf7\x16"
+			  "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6"
+			  "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7"
+			  "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0"
+			  "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2"
+			  "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9"
+			  "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06"
+			  "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6"
+			  "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d"
+			  "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b"
+			  "\xe0\x7a\xdd\xec\x32\x73\x42\x32"
+			  "\x7f\x35\x67\x60\x0d\xcf\x10\x52"
+			  "\x61\x22\x53\x8d\x8e\xbb\x33\x76"
+			  "\x59\xd9\x10\xce\xdf\xef\xc0\x41"
+			  "\xd5\x33\x29\x6a\xda\x46\xa4\x51"
+			  "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb"
+			  "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5"
+			  "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70"
+			  "\x26\x39\x95\x07\xad\x7a\xc9\x69"
+			  "\xfe\x81\xc7\x88\x08\x38\xaf\xad"
+			  "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8"
+			  "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6"
+			  "\x78\x62\xec\xa3\x59\xd9\xc6\x9d"
+			  "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c"
+			  "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc"
+			  "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37"
+			  "\x06\x9b\x30\x32\x67\xf7\xe7\xd2"
+			  "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12"
+			  "\x94\xa1\x34\x85\x93\x50\x4b\x0a"
+			  "\x3c\x7d\x49\x25\x01\x41\x6b\x96"
+			  "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93"
+			  "\x1f\x70\x38\xb8\x21\xee\xf6\xa7"
+			  "\xee\xeb\xe7\x81\xa4\x13\xb4\x87"
+			  "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2"
+			  "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8"
+			  "\x34\x42\xe5\xae\x45\x13\x63\xfe"
+			  "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c"
+			  "\x4c\xaf\xf0\x09\x62\x26\x66\x1e"
+			  "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7"
+			  "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47"
+			  "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2"
+			  "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c"
+			  "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a"
+			  "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b"
+			  "\x65\xa8\xac\xea\x8d\x68\x46\x34"
+			  "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a"
+			  "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57"
+			  "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad"
+			  "\x5b\x19\x50\x2f\x3a\xcc\x06\x46"
+			  "\x04\x51\x3f\x91\x97\xf0\xd2\x07"
+			  "\xe7\x93\x89\x7e\xb5\x32\x0f\x03"
+			  "\xe5\x58\x9e\x74\x72\xeb\xc2\x38"
+			  "\x00\x0c\x91\x72\x69\xed\x7d\x6d"
+			  "\xc8\x71\xf0\xec\xff\x80\xd9\x1c"
+			  "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc"
+			  "\xb1\xa6\xbd\xbd\x70\x40\xca\x20"
+			  "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c"
+			  "\xc7\x27\xe1\x6a\x29\xad\xa4\x03",
+		.ilen	= 512,
+		.rlen	= 512,
+	}
+};
+
 /*
  * CTS (Cipher Text Stealing) mode tests
  */
@@ -36031,4 +37878,78 @@ static struct comp_testvec lz4hc_decomp_tv_template[] = {
 	},
 };
 
+#define ZSTD_COMP_TEST_VECTORS 2
+#define ZSTD_DECOMP_TEST_VECTORS 2
+
+static struct comp_testvec zstd_comp_tv_template[] = {
+	{
+		.inlen	= 68,
+		.outlen	= 39,
+		.input	= "The algorithm is zstd. "
+			  "The algorithm is zstd. "
+			  "The algorithm is zstd.",
+		.output	= "\x28\xb5\x2f\xfd\x00\x50\xf5\x00\x00\xb8\x54\x68\x65"
+			  "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+			  "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+			  ,
+	},
+	{
+		.inlen	= 244,
+		.outlen	= 151,
+		.input	= "zstd, short for Zstandard, is a fast lossless "
+			  "compression algorithm, targeting real-time "
+			  "compression scenarios at zlib-level and better "
+			  "compression ratios. The zstd compression library "
+			  "provides in-memory compression and decompression "
+			  "functions.",
+		.output	= "\x28\xb5\x2f\xfd\x00\x50\x75\x04\x00\x42\x4b\x1e\x17"
+			  "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+			  "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+			  "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+			  "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+			  "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+			  "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+			  "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+			  "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+			  "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+			  "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+			  "\x20\xa9\x0e\x82\xb9\x43\x45\x01",
+	},
+};
+
+static struct comp_testvec zstd_decomp_tv_template[] = {
+	{
+		.inlen	= 43,
+		.outlen	= 68,
+		.input	= "\x28\xb5\x2f\xfd\x04\x50\xf5\x00\x00\xb8\x54\x68\x65"
+			  "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+			  "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+			  "\x6b\xf4\x13\x35",
+		.output	= "The algorithm is zstd. "
+			  "The algorithm is zstd. "
+			  "The algorithm is zstd.",
+	},
+	{
+		.inlen	= 155,
+		.outlen	= 244,
+		.input	= "\x28\xb5\x2f\xfd\x04\x50\x75\x04\x00\x42\x4b\x1e\x17"
+			  "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+			  "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+			  "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+			  "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+			  "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+			  "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+			  "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+			  "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+			  "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+			  "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+			  "\x20\xa9\x0e\x82\xb9\x43\x45\x01\xaa\x6d\xda\x0d",
+		.output	= "zstd, short for Zstandard, is a fast lossless "
+			  "compression algorithm, targeting real-time "
+			  "compression scenarios at zlib-level and better "
+			  "compression ratios. The zstd compression library "
+			  "provides in-memory compression and decompression "
+			  "functions.",
+	},
+};
 #endif	/* _CRYPTO_TESTMGR_H */

diff --git a/crypto/zstd.c b/crypto/zstd.c
new file mode 100644
index 0000000..9bfd28f
--- /dev/null
+++ b/crypto/zstd.c

@@ -0,0 +1,209 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2017-present, Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vmalloc.h>
+#include <linux/zstd.h>
+
+
+#define ZSTD_DEF_LEVEL	3
+
+struct zstd_ctx {
+	ZSTD_CCtx *cctx;
+	ZSTD_DCtx *dctx;
+	void *cwksp;
+	void *dwksp;
+};
+
+static ZSTD_parameters zstd_params(void)
+{
+	return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+}
+
+static int zstd_comp_init(struct zstd_ctx *ctx)
+{
+	int ret = 0;
+	const ZSTD_parameters params = zstd_params();
+	const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+
+	ctx->cwksp = vzalloc(wksp_size);
+	if (!ctx->cwksp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+	if (!ctx->cctx) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	vfree(ctx->cwksp);
+	goto out;
+}
+
+static int zstd_decomp_init(struct zstd_ctx *ctx)
+{
+	int ret = 0;
+	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+
+	ctx->dwksp = vzalloc(wksp_size);
+	if (!ctx->dwksp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+	if (!ctx->dctx) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	vfree(ctx->dwksp);
+	goto out;
+}
+
+static void zstd_comp_exit(struct zstd_ctx *ctx)
+{
+	vfree(ctx->cwksp);
+	ctx->cwksp = NULL;
+	ctx->cctx = NULL;
+}
+
+static void zstd_decomp_exit(struct zstd_ctx *ctx)
+{
+	vfree(ctx->dwksp);
+	ctx->dwksp = NULL;
+	ctx->dctx = NULL;
+}
+
+static int __zstd_init(void *ctx)
+{
+	int ret;
+
+	ret = zstd_comp_init(ctx);
+	if (ret)
+		return ret;
+	ret = zstd_decomp_init(ctx);
+	if (ret)
+		zstd_comp_exit(ctx);
+	return ret;
+}
+
+static int zstd_init(struct crypto_tfm *tfm)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_init(ctx);
+}
+
+static void __zstd_exit(void *ctx)
+{
+	zstd_comp_exit(ctx);
+	zstd_decomp_exit(ctx);
+}
+
+static void zstd_exit(struct crypto_tfm *tfm)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	__zstd_exit(ctx);
+}
+
+static int __zstd_compress(const u8 *src, unsigned int slen,
+			   u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t out_len;
+	struct zstd_ctx *zctx = ctx;
+	const ZSTD_parameters params = zstd_params();
+
+	out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
+	if (ZSTD_isError(out_len))
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
+			 unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __zstd_decompress(const u8 *src, unsigned int slen,
+			     u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t out_len;
+	struct zstd_ctx *zctx = ctx;
+
+	out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
+	if (ZSTD_isError(out_len))
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
+			   unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_decompress(src, slen, dst, dlen, ctx);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "zstd",
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct zstd_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= zstd_init,
+	.cra_exit		= zstd_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= zstd_compress,
+	.coa_decompress		= zstd_decompress } }
+};
+
+static int __init zstd_mod_init(void)
+{
+	int ret;
+
+	ret = crypto_register_alg(&alg);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+static void __exit zstd_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(zstd_mod_init);
+module_exit(zstd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Zstd Compression Algorithm");
+MODULE_ALIAS_CRYPTO("zstd");

diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 01de42c..63ed9ce 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig

@@ -9,7 +9,7 @@
 
 config ANDROID_BINDER_IPC
 	bool "Android Binder IPC Driver"
-	depends on MMU
+	depends on MMU && !M68K
 	default n
 	---help---
 	  Binder is used in Android for both communication between processes,
@@ -31,19 +31,6 @@
 	  created. Each binder device has its own context manager, and is
 	  therefore logically separated from the other devices.
 
-config ANDROID_BINDER_IPC_32BIT
-	bool
-	depends on !64BIT && ANDROID_BINDER_IPC
-	default y
-	---help---
-	  The Binder API has been changed to support both 32 and 64bit
-	  applications in a mixed environment.
-
-	  Enable this to support an old 32-bit Android user-space (v4.4 and
-	  earlier).
-
-	  Note that enabling this will break newer Android user-space.
-
 config ANDROID_BINDER_IPC_SELFTEST
 	bool "Android Binder IPC Driver Selftest"
 	depends on ANDROID_BINDER_IPC

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 4768de0..ff4179f 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c

@@ -71,10 +71,6 @@
 #include <linux/security.h>
 #include <linux/spinlock.h>
 
-#ifdef CONFIG_ANDROID_BINDER_IPC_32BIT
-#define BINDER_IPC_32BIT 1
-#endif
-
 #include <uapi/linux/android/binder.h>
 #include "binder_alloc.h"
 #include "binder_trace.h"
@@ -142,7 +138,7 @@ enum {
 };
 static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR |
 	BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION;
-module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug_mask, binder_debug_mask, uint, 0644);
 
 static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
 module_param_named(devices, binder_devices_param, charp, S_IRUGO);
@@ -161,7 +157,7 @@ static int binder_set_stop_on_user_error(const char *val,
 	return ret;
 }
 module_param_call(stop_on_user_error, binder_set_stop_on_user_error,
-	param_get_int, &binder_stop_on_user_error, S_IWUSR | S_IRUGO);
+	param_get_int, &binder_stop_on_user_error, 0644);
 
 #define binder_debug(mask, x...) \
 	do { \
@@ -250,7 +246,7 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
 	unsigned int cur = atomic_inc_return(&log->cur);
 
 	if (cur >= ARRAY_SIZE(log->entry))
-		log->full = 1;
+		log->full = true;
 	e = &log->entry[cur % ARRAY_SIZE(log->entry)];
 	WRITE_ONCE(e->debug_id_done, 0);
 	/*
@@ -2266,8 +2262,8 @@ static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset)
 	struct binder_object_header *hdr;
 	size_t object_size = 0;
 
-	if (offset > buffer->data_size - sizeof(*hdr) ||
-	    buffer->data_size < sizeof(*hdr) ||
+	if (buffer->data_size < sizeof(*hdr) ||
+	    offset > buffer->data_size - sizeof(*hdr) ||
 	    !IS_ALIGNED(offset, sizeof(u32)))
 		return 0;
 
@@ -2407,7 +2403,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 	int debug_id = buffer->debug_id;
 
 	binder_debug(BINDER_DEBUG_TRANSACTION,
-		     "%d buffer release %d, size %zd-%zd, failed at %p\n",
+		     "%d buffer release %d, size %zd-%zd, failed at %pK\n",
 		     proc->pid, buffer->debug_id,
 		     buffer->data_size, buffer->offsets_size, failed_at);
 
@@ -2856,7 +2852,7 @@ static bool binder_proc_transaction(struct binder_transaction *t,
 		if (node->has_async_transaction) {
 			pending_async = true;
 		} else {
-			node->has_async_transaction = 1;
+			node->has_async_transaction = true;
 		}
 	}
 
@@ -3055,6 +3051,14 @@ static void binder_transaction(struct binder_proc *proc,
 			else
 				return_error = BR_DEAD_REPLY;
 			mutex_unlock(&context->context_mgr_node_lock);
+			if (target_node && target_proc == proc) {
+				binder_user_error("%d:%d got transaction to context manager from process owning it\n",
+						  proc->pid, thread->pid);
+				return_error = BR_FAILED_REPLY;
+				return_error_param = -EINVAL;
+				return_error_line = __LINE__;
+				goto err_invalid_target_handle;
+			}
 		}
 		if (!target_node) {
 			/*
@@ -3748,7 +3752,7 @@ static int binder_thread_write(struct binder_proc *proc,
 				w = binder_dequeue_work_head_ilocked(
 						&buf_node->async_todo);
 				if (!w) {
-					buf_node->has_async_transaction = 0;
+					buf_node->has_async_transaction = false;
 				} else {
 					binder_enqueue_work_ilocked(
 							w, &proc->todo);
@@ -3970,7 +3974,7 @@ static int binder_thread_write(struct binder_proc *proc,
 				}
 			}
 			binder_debug(BINDER_DEBUG_DEAD_BINDER,
-				     "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n",
+				     "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n",
 				     proc->pid, thread->pid, (u64)cookie,
 				     death);
 			if (death == NULL) {
@@ -4178,6 +4182,7 @@ static int binder_thread_read(struct binder_proc *proc,
 			binder_inner_proc_unlock(proc);
 			if (put_user(e->cmd, (uint32_t __user *)ptr))
 				return -EFAULT;
+			cmd = e->cmd;
 			e->cmd = BR_OK;
 			ptr += sizeof(uint32_t);
 
@@ -5059,7 +5064,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 		failure_string = "bad vm_flags";
 		goto err_bad_arg;
 	}
-	vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE;
+	vma->vm_flags |= VM_DONTCOPY | VM_MIXEDMAP;
+	vma->vm_flags &= ~VM_MAYWRITE;
+
 	vma->vm_ops = &binder_vm_ops;
 	vma->vm_private_data = proc;
 
@@ -5072,7 +5079,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	return 0;
 
 err_bad_arg:
-	pr_err("binder_mmap: %d %lx-%lx %s failed %d\n",
+	pr_err("%s: %d %lx-%lx %s failed %d\n", __func__,
 	       proc->pid, vma->vm_start, vma->vm_end, failure_string, ret);
 	return ret;
 }
@@ -5082,7 +5089,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	struct binder_proc *proc;
 	struct binder_device *binder_dev;
 
-	binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_open: %d:%d\n",
+	binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
 		     current->group_leader->pid, current->pid);
 
 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
@@ -5128,7 +5135,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 		 * anyway print all contexts that a given PID has, so this
 		 * is not a problem.
 		 */
-		proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO,
+		proc->debugfs_entry = debugfs_create_file(strbuf, 0444,
 			binder_debugfs_dir_entry_proc,
 			(void *)(unsigned long)proc->pid,
 			&binder_proc_fops);
@@ -5395,7 +5402,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
 	spin_lock(&t->lock);
 	to_proc = t->to_proc;
 	seq_printf(m,
-		   "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %d:%d r%d",
+		   "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %d:%d r%d",
 		   prefix, t->debug_id, t,
 		   t->from ? t->from->proc->pid : 0,
 		   t->from ? t->from->pid : 0,
@@ -5420,7 +5427,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
 	}
 	if (buffer->target_node)
 		seq_printf(m, " node %d", buffer->target_node->debug_id);
-	seq_printf(m, " size %zd:%zd data %p\n",
+	seq_printf(m, " size %zd:%zd data %pK\n",
 		   buffer->data_size, buffer->offsets_size,
 		   buffer->data);
 }
@@ -5955,11 +5962,13 @@ static int __init init_binder_device(const char *name)
 static int __init binder_init(void)
 {
 	int ret;
-	char *device_name, *device_names;
+	char *device_name, *device_names, *device_tmp;
 	struct binder_device *device;
 	struct hlist_node *tmp;
 
-	binder_alloc_shrinker_init();
+	ret = binder_alloc_shrinker_init();
+	if (ret)
+		return ret;
 
 	atomic_set(&binder_transaction_log.cur, ~0U);
 	atomic_set(&binder_transaction_log_failed.cur, ~0U);
@@ -5971,27 +5980,27 @@ static int __init binder_init(void)
 
 	if (binder_debugfs_dir_entry_root) {
 		debugfs_create_file("state",
-				    S_IRUGO,
+				    0444,
 				    binder_debugfs_dir_entry_root,
 				    NULL,
 				    &binder_state_fops);
 		debugfs_create_file("stats",
-				    S_IRUGO,
+				    0444,
 				    binder_debugfs_dir_entry_root,
 				    NULL,
 				    &binder_stats_fops);
 		debugfs_create_file("transactions",
-				    S_IRUGO,
+				    0444,
 				    binder_debugfs_dir_entry_root,
 				    NULL,
 				    &binder_transactions_fops);
 		debugfs_create_file("transaction_log",
-				    S_IRUGO,
+				    0444,
 				    binder_debugfs_dir_entry_root,
 				    &binder_transaction_log,
 				    &binder_transaction_log_fops);
 		debugfs_create_file("failed_transaction_log",
-				    S_IRUGO,
+				    0444,
 				    binder_debugfs_dir_entry_root,
 				    &binder_transaction_log_failed,
 				    &binder_transaction_log_fops);
@@ -6008,7 +6017,8 @@ static int __init binder_init(void)
 	}
 	strcpy(device_names, binder_devices_param);
 
-	while ((device_name = strsep(&device_names, ","))) {
+	device_tmp = device_names;
+	while ((device_name = strsep(&device_tmp, ","))) {
 		ret = init_binder_device(device_name);
 		if (ret)
 			goto err_init_binder_device_failed;
@@ -6022,6 +6032,9 @@ static int __init binder_init(void)
 		hlist_del(&device->hlist);
 		kfree(device);
 	}
+
+	kfree(device_names);
+
 err_alloc_device_names_failed:
 	debugfs_remove_recursive(binder_debugfs_dir_entry_root);
 

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index d34d1eb..7cee382 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c

@@ -217,7 +217,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
 		mm = alloc->vma_vm_mm;
 
 	if (mm) {
-		down_write(&mm->mmap_sem);
+		down_read(&mm->mmap_sem);
 		vma = alloc->vma;
 	}
 
@@ -286,7 +286,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
 		/* vm_insert_page does not seem to increment the refcount */
 	}
 	if (mm) {
-		up_write(&mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
 	return 0;
@@ -319,17 +319,18 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
 	}
 err_no_vma:
 	if (mm) {
-		up_write(&mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
 	return vma ? -ENOMEM : -ESRCH;
 }
 
-struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc,
-						  size_t data_size,
-						  size_t offsets_size,
-						  size_t extra_buffers_size,
-						  int is_async)
+static struct binder_buffer *binder_alloc_new_buf_locked(
+				struct binder_alloc *alloc,
+				size_t data_size,
+				size_t offsets_size,
+				size_t extra_buffers_size,
+				int is_async)
 {
 	struct rb_node *n = alloc->free_buffers.rb_node;
 	struct binder_buffer *buffer;
@@ -667,7 +668,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
 		goto err_already_mapped;
 	}
 
-	area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP);
+	area = get_vm_area(vma->vm_end - vma->vm_start, VM_ALLOC);
 	if (area == NULL) {
 		ret = -ENOMEM;
 		failure_string = "get_vm_area";
@@ -1004,8 +1005,14 @@ void binder_alloc_init(struct binder_alloc *alloc)
 	INIT_LIST_HEAD(&alloc->buffers);
 }
 
-void binder_alloc_shrinker_init(void)
+int binder_alloc_shrinker_init(void)
 {
-	list_lru_init(&binder_alloc_lru);
-	register_shrinker(&binder_shrinker);
+	int ret = list_lru_init(&binder_alloc_lru);
+
+	if (ret == 0) {
+		ret = register_shrinker(&binder_shrinker);
+		if (ret)
+			list_lru_destroy(&binder_alloc_lru);
+	}
+	return ret;
 }

diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index c4b49e2..fb3238c 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h

@@ -129,7 +129,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
 						  size_t extra_buffers_size,
 						  int is_async);
 extern void binder_alloc_init(struct binder_alloc *alloc);
-void binder_alloc_shrinker_init(void);
+extern int binder_alloc_shrinker_init(void);
 extern void binder_alloc_vma_close(struct binder_alloc *alloc);
 extern struct binder_buffer *
 binder_alloc_prepare_to_free(struct binder_alloc *alloc,

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 4b5cd3a..c084a7f 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c

@@ -32,6 +32,9 @@ static const char * const backends[] = {
 #if IS_ENABLED(CONFIG_CRYPTO_842)
 	"842",
 #endif
+#if IS_ENABLED(CONFIG_CRYPTO_ZSTD)
+	"zstd",
+#endif
 	NULL
 };
 

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 27dc9cc..6f5e348 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c

@@ -50,6 +50,11 @@ static const char *default_compressor = "lzo";
 
 /* Module params (documentation at end) */
 static unsigned int num_devices = 1;
+/*
+ * Pages that compress to sizes equals or greater than this are stored
+ * uncompressed in memory.
+ */
+static size_t huge_class_size;
 
 static void zram_free_page(struct zram *zram, size_t index);
 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
@@ -1188,6 +1193,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
 		return false;
 	}
 
+	if (!huge_class_size)
+		huge_class_size = zs_huge_class_size(zram->mem_pool);
 	return true;
 }
 
@@ -1373,7 +1380,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 		return ret;
 	}
 
-	if (comp_len > max_zpage_size)
+	if (unlikely(comp_len >= huge_class_size))
 		comp_len = PAGE_SIZE;
 	/*
 	 * handle allocation has 2 paths:

diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 6ef2d5d..29af8d0 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h

@@ -21,22 +21,6 @@
 
 #include "zcomp.h"
 
-/*-- Configurable parameters */
-
-/*
- * Pages that compress to size greater than this are stored
- * uncompressed in memory.
- */
-static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
-
-/*
- * NOTE: max_zpage_size must be less than or equal to:
- *   ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
- * always return failure.
- */
-
-/*-- End of configurable params */
-
 #define SECTOR_SHIFT		9
 #define SECTORS_PER_PAGE_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE	(1 << SECTORS_PER_PAGE_SHIFT)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 464b95a..a9ce2f7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -263,7 +263,7 @@
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 
 #include <asm/processor.h>
 #include <asm/uaccess.h>
@@ -438,11 +438,10 @@ static int crng_init = 0;
 #define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
 static unsigned long crng_global_init_time = 0;
-#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
-static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE]);
+#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE)
+static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]);
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
+				    __u8 tmp[CHACHA_BLOCK_SIZE], int used);
 static void process_random_ready_list(void);
 
 static struct ratelimit_state unseeded_warning =
@@ -818,7 +817,7 @@ static int crng_fast_load(const char *cp, size_t len)
 	}
 	p = (unsigned char *) &primary_crng.state[4];
 	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
+		p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
 		cp++; crng_init_cnt++; len--;
 	}
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
@@ -868,7 +867,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	unsigned long	flags;
 	int		i, num;
 	union {
-		__u8	block[CHACHA20_BLOCK_SIZE];
+		__u8	block[CHACHA_BLOCK_SIZE];
 		__u32	key[8];
 	} buf;
 
@@ -879,7 +878,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	} else {
 		_extract_crng(&primary_crng, buf.block);
 		_crng_backtrack_protect(&primary_crng, buf.block,
-					CHACHA20_KEY_SIZE);
+					CHACHA_KEY_SIZE);
 	}
 	spin_lock_irqsave(&crng->lock, flags);
 	for (i = 0; i < 8; i++) {
@@ -926,7 +925,7 @@ static inline void crng_wait_ready(void)
 }
 
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE])
+			  __u8 out[CHACHA_BLOCK_SIZE])
 {
 	unsigned long v, flags;
 
@@ -943,7 +942,7 @@ static void _extract_crng(struct crng_state *crng,
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE])
 {
 	struct crng_state *crng = NULL;
 
@@ -961,14 +960,14 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+				    __u8 tmp[CHACHA_BLOCK_SIZE], int used)
 {
 	unsigned long	flags;
 	__u32		*s, *d;
 	int		i;
 
 	used = round_up(used, sizeof(__u32));
-	if (used + CHACHA20_KEY_SIZE > CHACHA20_BLOCK_SIZE) {
+	if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
 		extract_crng(tmp);
 		used = 0;
 	}
@@ -980,7 +979,7 @@ static void _crng_backtrack_protect(struct crng_state *crng,
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used)
 {
 	struct crng_state *crng = NULL;
 
@@ -995,8 +994,8 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
 
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
-	ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE;
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
+	__u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
 	int large_request = (nbytes > 256);
 
 	while (nbytes) {
@@ -1010,7 +1009,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 		}
 
 		extract_crng(tmp);
-		i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE);
+		i = min_t(int, nbytes, CHACHA_BLOCK_SIZE);
 		if (copy_to_user(buf, tmp, i)) {
 			ret = -EFAULT;
 			break;
@@ -1564,7 +1563,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
  */
 void get_random_bytes(void *buf, int nbytes)
 {
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
 
 #if DEBUG_RANDOM_BOOT > 0
 	if (!crng_ready())
@@ -1573,10 +1572,10 @@ void get_random_bytes(void *buf, int nbytes)
 #endif
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
-	while (nbytes >= CHACHA20_BLOCK_SIZE) {
+	while (nbytes >= CHACHA_BLOCK_SIZE) {
 		extract_crng(buf);
-		buf += CHACHA20_BLOCK_SIZE;
-		nbytes -= CHACHA20_BLOCK_SIZE;
+		buf += CHACHA_BLOCK_SIZE;
+		nbytes -= CHACHA_BLOCK_SIZE;
 	}
 
 	if (nbytes > 0) {
@@ -1584,7 +1583,7 @@ void get_random_bytes(void *buf, int nbytes)
 		memcpy(buf, tmp, nbytes);
 		crng_backtrack_protect(tmp, nbytes);
 	} else
-		crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE);
+		crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE);
 	memzero_explicit(tmp, sizeof(tmp));
 }
 EXPORT_SYMBOL(get_random_bytes);
@@ -2110,8 +2109,8 @@ struct ctl_table random_table[] = {
 
 struct batched_entropy {
 	union {
-		u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
-		u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+		u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
+		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
 	};
 	unsigned int position;
 };

diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c
index 34979e7..da8e3a2 100644
--- a/drivers/cpufreq/cpufreq_times.c
+++ b/drivers/cpufreq/cpufreq_times.c

@@ -431,11 +431,13 @@ static int concurrent_time_text_seq_show(struct seq_file *m, void *v,
 
 	hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) {
 		atomic64_t *times = get_times(uid_entry->concurrent_times);
+
 		seq_put_decimal_ull(m, "", (u64)uid_entry->uid);
 		seq_putc(m, ':');
 
 		for (i = 0; i < num_possible_cpus; ++i) {
 			u64 time = cputime_to_clock_t(atomic64_read(&times[i]));
+
 			seq_put_decimal_ull(m, " ", time);
 		}
 		seq_putc(m, '\n');
@@ -535,6 +537,7 @@ static ssize_t uid_cpupower_enable_write(struct file *file,
 void cpufreq_task_times_init(struct task_struct *p)
 {
 	unsigned long flags;
+
 	spin_lock_irqsave(&task_time_in_state_lock, flags);
 	p->time_in_state = NULL;
 	spin_unlock_irqrestore(&task_time_in_state_lock, flags);
@@ -722,7 +725,7 @@ void cpufreq_acct_update_power(struct task_struct *p, cputime_t cputime)
 	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
 	int cpu = 0;
 
-	if (!freqs || p->flags & PF_EXITING)
+	if (!freqs || is_idle_task(p) || p->flags & PF_EXITING)
 		return;
 
 	state = freqs->offset + READ_ONCE(freqs->last_index);

diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 21340e0..f521448 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm

@@ -4,6 +4,7 @@
 config ARM_CPUIDLE
         bool "Generic ARM/ARM64 CPU idle Driver"
         select DT_IDLE_STATES
+	select CPU_IDLE_MULTIPLE_DRIVERS
         help
           Select this to enable generic cpuidle driver for ARM.
           It provides a generic idle driver whose idle states are configured

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index f440d38..f47c545 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c

@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/slab.h>
+#include <linux/topology.h>
 
 #include <asm/cpuidle.h>
 
@@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,
 	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
 }
 
-static struct cpuidle_driver arm_idle_driver = {
+static struct cpuidle_driver arm_idle_driver __initdata = {
 	.name = "arm_idle",
 	.owner = THIS_MODULE,
 	/*
@@ -80,30 +81,42 @@ static const struct of_device_id arm_idle_state_match[] __initconst = {
 static int __init arm_idle_init(void)
 {
 	int cpu, ret;
-	struct cpuidle_driver *drv = &arm_idle_driver;
+	struct cpuidle_driver *drv;
 	struct cpuidle_device *dev;
 
-	/*
-	 * Initialize idle states data, starting at index 1.
-	 * This driver is DT only, if no DT idle states are detected (ret == 0)
-	 * let the driver initialization fail accordingly since there is no
-	 * reason to initialize the idle driver if only wfi is supported.
-	 */
-	ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
-	if (ret <= 0)
-		return ret ? : -ENODEV;
-
-	ret = cpuidle_register_driver(drv);
-	if (ret) {
-		pr_err("Failed to register cpuidle driver\n");
-		return ret;
-	}
-
-	/*
-	 * Call arch CPU operations in order to initialize
-	 * idle states suspend back-end specific data
-	 */
 	for_each_possible_cpu(cpu) {
+
+		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);
+		if (!drv) {
+			ret = -ENOMEM;
+			goto out_fail;
+		}
+
+		drv->cpumask = (struct cpumask *)cpumask_of(cpu);
+
+		/*
+		 * Initialize idle states data, starting at index 1.  This
+		 * driver is DT only, if no DT idle states are detected (ret
+		 * == 0) let the driver initialization fail accordingly since
+		 * there is no reason to initialize the idle driver if only
+		 * wfi is supported.
+		 */
+		ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
+		if (ret <= 0) {
+			ret = ret ? : -ENODEV;
+			goto out_kfree_drv;
+		}
+
+		ret = cpuidle_register_driver(drv);
+		if (ret) {
+			pr_err("Failed to register cpuidle driver\n");
+			goto out_kfree_drv;
+		}
+
+		/*
+		 * Call arch CPU operations in order to initialize
+		 * idle states suspend back-end specific data
+		 */
 		ret = arm_cpuidle_init(cpu);
 
 		/*
@@ -115,14 +128,14 @@ static int __init arm_idle_init(void)
 
 		if (ret) {
 			pr_err("CPU %d failed to init idle CPU ops\n", cpu);
-			goto out_fail;
+			goto out_unregister_drv;
 		}
 
 		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 		if (!dev) {
 			pr_err("Failed to allocate cpuidle device\n");
 			ret = -ENOMEM;
-			goto out_fail;
+			goto out_unregister_drv;
 		}
 		dev->cpu = cpu;
 
@@ -130,21 +143,28 @@ static int __init arm_idle_init(void)
 		if (ret) {
 			pr_err("Failed to register cpuidle device for CPU %d\n",
 			       cpu);
-			kfree(dev);
-			goto out_fail;
+			goto out_kfree_dev;
 		}
 	}
 
 	return 0;
+
+out_kfree_dev:
+	kfree(dev);
+out_unregister_drv:
+	cpuidle_unregister_driver(drv);
+out_kfree_drv:
+	kfree(drv);
 out_fail:
 	while (--cpu >= 0) {
 		dev = per_cpu(cpuidle_devices, cpu);
+		drv = cpuidle_get_cpu_driver(dev);
 		cpuidle_unregister_device(dev);
+		cpuidle_unregister_driver(drv);
 		kfree(dev);
+		kfree(drv);
 	}
 
-	cpuidle_unregister_driver(drv);
-
 	return ret;
 }
 device_initcall(arm_idle_init);

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 7053bb4..1936383 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c

@@ -305,7 +305,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!test_and_set_bit(POLL_ENABLED, &sync_file->flags)) {
+	if (list_empty(&sync_file->cb.node) &&
+	    !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) {
 		if (fence_add_callback(sync_file->fence, &sync_file->cb,
 					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 6ed482c..fcbd5d5 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig

@@ -536,16 +536,27 @@
 
 	  If unsure, say N.
 
+config DM_VERITY_AVB
+	tristate "Support AVB specific verity error behavior"
+	depends on DM_VERITY
+	---help---
+	  Enables Android Verified Boot platform-specific error
+	  behavior. In particular, it will modify the vbmeta partition
+	  specified on the kernel command-line when non-transient error
+	  occurs (followed by a panic).
+
+	  If unsure, say N.
+
 config DM_ANDROID_VERITY
 	bool "Android verity target support"
+	depends on BLK_DEV_DM=y
 	depends on DM_VERITY=y
 	depends on X509_CERTIFICATE_PARSER
 	depends on SYSTEM_TRUSTED_KEYRING
-	depends on PUBLIC_KEY_ALGO_RSA
+	depends on CRYPTO_RSA
 	depends on KEYS
 	depends on ASYMMETRIC_KEY_TYPE
 	depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
-	depends on MD_LINEAR=y
 	---help---
 	  This device-mapper target is virtually a VERITY target. This
 	  target is setup by reading the metadata contents piggybacked
@@ -553,4 +564,24 @@
 	  of the metadata contents are verified against the key included
 	  in the system keyring. Upon success, the underlying verity
 	  target is setup.
+
+config DM_ANDROID_VERITY_AT_MOST_ONCE_DEFAULT_ENABLED
+	bool "Verity will validate blocks at most once"
+	depends on DM_VERITY
+	---help---
+	  Default enables at_most_once option for dm-verity
+
+	  Verify data blocks only the first time they are read from the
+	  data device, rather than every time.  This reduces the overhead
+	  of dm-verity so that it can be used on systems that are memory
+	  and/or CPU constrained.  However, it provides a reduced level
+	  of security because only offline tampering of the data device's
+	  content will be detected, not online tampering.
+
+	  Hash blocks are still verified each time they are read from the
+	  hash device, since verification of hash blocks is less performance
+	  critical than data blocks, and a hash block will not be verified
+	  any more after all the data blocks it covers have been verified anyway.
+
+	  If unsure, say N.
 endif # MD

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index c3bf33b..c8dec9c 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile

@@ -70,3 +70,7 @@
 ifeq ($(CONFIG_DM_VERITY_FEC),y)
 dm-verity-objs			+= dm-verity-fec.o
 endif
+
+ifeq ($(CONFIG_DM_VERITY_AVB),y)
+dm-verity-objs			+= dm-verity-avb.o
+endif

diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c
index eb4bdf6..f9491de 100644
--- a/drivers/md/dm-android-verity.c
+++ b/drivers/md/dm-android-verity.c

@@ -33,6 +33,7 @@
 
 #include <asm/setup.h>
 #include <crypto/hash.h>
+#include <crypto/hash_info.h>
 #include <crypto/public_key.h>
 #include <crypto/sha.h>
 #include <keys/asymmetric-type.h>
@@ -122,75 +123,6 @@ static inline bool is_unlocked(void)
 	return !strncmp(verifiedbootstate, unlocked, sizeof(unlocked));
 }
 
-static int table_extract_mpi_array(struct public_key_signature *pks,
-				const void *data, size_t len)
-{
-	MPI mpi = mpi_read_raw_data(data, len);
-
-	if (!mpi) {
-		DMERR("Error while allocating mpi array");
-		return -ENOMEM;
-	}
-
-	pks->mpi[0] = mpi;
-	pks->nr_mpi = 1;
-	return 0;
-}
-
-static struct public_key_signature *table_make_digest(
-						enum hash_algo hash,
-						const void *table,
-						unsigned long table_len)
-{
-	struct public_key_signature *pks = NULL;
-	struct crypto_shash *tfm;
-	struct shash_desc *desc;
-	size_t digest_size, desc_size;
-	int ret;
-
-	/* Allocate the hashing algorithm we're going to need and find out how
-	 * big the hash operational data will be.
-	 */
-	tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
-	if (IS_ERR(tfm))
-		return ERR_CAST(tfm);
-
-	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
-	digest_size = crypto_shash_digestsize(tfm);
-
-	/* We allocate the hash operational data storage on the end of out
-	 * context data and the digest output buffer on the end of that.
-	 */
-	ret = -ENOMEM;
-	pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL);
-	if (!pks)
-		goto error;
-
-	pks->pkey_hash_algo = hash;
-	pks->digest = (u8 *)pks + sizeof(*pks) + desc_size;
-	pks->digest_size = digest_size;
-
-	desc = (struct shash_desc *)(pks + 1);
-	desc->tfm = tfm;
-	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	ret = crypto_shash_init(desc);
-	if (ret < 0)
-		goto error;
-
-	ret = crypto_shash_finup(desc, table, table_len, pks->digest);
-	if (ret < 0)
-		goto error;
-
-	crypto_free_shash(tfm);
-	return pks;
-
-error:
-	kfree(pks);
-	crypto_free_shash(tfm);
-	return ERR_PTR(ret);
-}
-
 static int read_block_dev(struct bio_read *payload, struct block_device *bdev,
 		sector_t offset, int length)
 {
@@ -207,6 +139,7 @@ static int read_block_dev(struct bio_read *payload, struct block_device *bdev,
 
 	bio->bi_bdev = bdev;
 	bio->bi_iter.bi_sector = offset;
+	bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
 
 	payload->page_io = kzalloc(sizeof(struct page *) *
 		payload->number_of_pages, GFP_KERNEL);
@@ -230,7 +163,7 @@ static int read_block_dev(struct bio_read *payload, struct block_device *bdev,
 		}
 	}
 
-	if (!submit_bio_wait(READ, bio))
+	if (!submit_bio_wait(bio))
 		/* success */
 		goto free_bio;
 	DMERR("bio read failed");
@@ -567,51 +500,6 @@ static int verity_mode(void)
 	return DM_VERITY_MODE_EIO;
 }
 
-static int verify_verity_signature(char *key_id,
-		struct android_metadata *metadata)
-{
-	key_ref_t key_ref;
-	struct key *key;
-	struct public_key_signature *pks = NULL;
-	int retval = -EINVAL;
-
-	key_ref = keyring_search(make_key_ref(system_trusted_keyring, 1),
-		&key_type_asymmetric, key_id);
-
-	if (IS_ERR(key_ref)) {
-		DMERR("keyring: key not found");
-		return -ENOKEY;
-	}
-
-	key = key_ref_to_ptr(key_ref);
-
-	pks = table_make_digest(HASH_ALGO_SHA256,
-			(const void *)metadata->verity_table,
-			le32_to_cpu(metadata->header->table_length));
-
-	if (IS_ERR(pks)) {
-		DMERR("hashing failed");
-		retval = PTR_ERR(pks);
-		pks = NULL;
-		goto error;
-	}
-
-	retval = table_extract_mpi_array(pks, &metadata->header->signature[0],
-				RSANUMBYTES);
-	if (retval < 0) {
-		DMERR("Error extracting mpi %d", retval);
-		goto error;
-	}
-
-	retval = verify_signature(key, pks);
-	mpi_free(pks->rsa.s);
-error:
-	kfree(pks);
-	key_put(key);
-
-	return retval;
-}
-
 static void handle_error(void)
 {
 	int mode = verity_mode();
@@ -623,6 +511,95 @@ static void handle_error(void)
 	}
 }
 
+static struct public_key_signature *table_make_digest(
+						enum hash_algo hash,
+						const void *table,
+						unsigned long table_len)
+{
+	struct public_key_signature *pks = NULL;
+	struct crypto_shash *tfm;
+	struct shash_desc *desc;
+	size_t digest_size, desc_size;
+	int ret;
+
+	/* Allocate the hashing algorithm we're going to need and find out how
+	 * big the hash operational data will be.
+	 */
+	tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+
+	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+	digest_size = crypto_shash_digestsize(tfm);
+
+	/* We allocate the hash operational data storage on the end of out
+	 * context data and the digest output buffer on the end of that.
+	 */
+	ret = -ENOMEM;
+	pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL);
+	if (!pks)
+		goto error;
+
+	pks->pkey_algo = "rsa";
+	pks->hash_algo = hash_algo_name[hash];
+	pks->digest = (u8 *)pks + sizeof(*pks) + desc_size;
+	pks->digest_size = digest_size;
+
+	desc = (struct shash_desc *)(pks + 1);
+	desc->tfm = tfm;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ret = crypto_shash_init(desc);
+	if (ret < 0)
+		goto error;
+
+	ret = crypto_shash_finup(desc, table, table_len, pks->digest);
+	if (ret < 0)
+		goto error;
+
+	crypto_free_shash(tfm);
+	return pks;
+
+error:
+	kfree(pks);
+	crypto_free_shash(tfm);
+	return ERR_PTR(ret);
+}
+
+
+static int verify_verity_signature(char *key_id,
+		struct android_metadata *metadata)
+{
+	struct public_key_signature *pks = NULL;
+	int retval = -EINVAL;
+
+	if (!key_id)
+		goto error;
+
+	pks = table_make_digest(HASH_ALGO_SHA256,
+			(const void *)metadata->verity_table,
+			le32_to_cpu(metadata->header->table_length));
+	if (IS_ERR(pks)) {
+		DMERR("hashing failed");
+		retval = PTR_ERR(pks);
+		pks = NULL;
+		goto error;
+	}
+
+	pks->s = kmemdup(&metadata->header->signature[0], RSANUMBYTES, GFP_KERNEL);
+	if (!pks->s) {
+		DMERR("Error allocating memory for signature");
+		goto error;
+	}
+	pks->s_size = RSANUMBYTES;
+
+	retval = verify_signature_one(pks, NULL, key_id);
+	kfree(pks->s);
+error:
+	kfree(pks);
+	return retval;
+}
+
 static inline bool test_mult_overflow(sector_t a, u32 b)
 {
 	sector_t r = (sector_t)~0ULL;
@@ -694,8 +671,8 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	dev_t uninitialized_var(dev);
 	struct android_metadata *metadata = NULL;
 	int err = 0, i, mode;
-	char *key_id, *table_ptr, dummy, *target_device,
-	*verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS];
+	char *key_id = NULL, *table_ptr, dummy, *target_device;
+	char *verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS];
 	/* One for specifying number of opt args and one for mode */
 	sector_t data_sectors;
 	u32 data_block_size;
@@ -714,16 +691,16 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			handle_error();
 			return -EINVAL;
 		}
-	} else if (argc == 2)
-		key_id = argv[1];
-	else {
+		target_device = argv[0];
+	} else if (argc == 2) {
+		key_id = argv[0];
+		target_device = argv[1];
+	} else {
 		DMERR("Incorrect number of arguments");
 		handle_error();
 		return -EINVAL;
 	}
 
-	target_device = argv[0];
-
 	dev = name_to_dev_t(target_device);
 	if (!dev) {
 		DMERR("no dev found for %s", target_device);
@@ -877,12 +854,11 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	}
 
 	err = verity_ctr(ti, no_of_args, verity_table_args);
-
-	if (err)
-		DMERR("android-verity failed to mount as verity target");
-	else {
+	if (err) {
+		DMERR("android-verity failed to create a verity target");
+	} else {
 		target_added = true;
-		DMINFO("android-verity mounted as verity target");
+		DMINFO("android-verity created as verity target");
 	}
 
 free_metadata:

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 7a5b75f..45a1f92 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c

@@ -115,6 +115,10 @@ struct iv_tcw_private {
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
 	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
 
+enum cipher_flags {
+	CRYPT_IV_LARGE_SECTORS,		/* Calculate IV from sector_size, not 512B sectors */
+};
+
 /*
  * The fields in here must be read only after initialization.
  */
@@ -150,11 +154,14 @@ struct crypt_config {
 	} iv_gen_private;
 	sector_t iv_offset;
 	unsigned int iv_size;
+	unsigned short int sector_size;
+	unsigned char sector_shift;
 
 	/* ESSIV: struct crypto_cipher *essiv_tfm */
 	void *iv_private;
 	struct crypto_skcipher **tfms;
 	unsigned tfms_count;
+	unsigned long cipher_flags;
 
 	/*
 	 * Layout of each crypto request:
@@ -484,6 +491,11 @@ static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
 {
 	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
 
+	if (cc->sector_size != (1 << SECTOR_SHIFT)) {
+		ti->error = "Unsupported sector size for LMK";
+		return -EINVAL;
+	}
+
 	lmk->hash_tfm = crypto_alloc_shash("md5", 0, 0);
 	if (IS_ERR(lmk->hash_tfm)) {
 		ti->error = "Error initializing LMK hash";
@@ -633,6 +645,11 @@ static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti,
 {
 	struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
 
+	if (cc->sector_size != (1 << SECTOR_SHIFT)) {
+		ti->error = "Unsupported sector size for TCW";
+		return -EINVAL;
+	}
+
 	if (cc->key_size <= (cc->iv_size + TCW_WHITENING_SIZE)) {
 		ti->error = "Wrong key size for TCW";
 		return -EINVAL;
@@ -846,21 +863,27 @@ static int crypt_convert_block(struct crypt_config *cc,
 	u8 *iv;
 	int r;
 
+	/* Reject unexpected unaligned bio. */
+	if (unlikely(bv_in.bv_len & (cc->sector_size - 1)))
+		return -EIO;
+
 	dmreq = dmreq_of_req(cc, req);
 	iv = iv_of_dmreq(cc, dmreq);
 
 	dmreq->iv_sector = ctx->cc_sector;
+	if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
+		dmreq->iv_sector >>= cc->sector_shift;
 	dmreq->ctx = ctx;
 	sg_init_table(&dmreq->sg_in, 1);
-	sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
+	sg_set_page(&dmreq->sg_in, bv_in.bv_page, cc->sector_size,
 		    bv_in.bv_offset);
 
 	sg_init_table(&dmreq->sg_out, 1);
-	sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
+	sg_set_page(&dmreq->sg_out, bv_out.bv_page, cc->sector_size,
 		    bv_out.bv_offset);
 
-	bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
-	bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
+	bio_advance_iter(ctx->bio_in, &ctx->iter_in, cc->sector_size);
+	bio_advance_iter(ctx->bio_out, &ctx->iter_out, cc->sector_size);
 
 	if (cc->iv_gen_ops) {
 		r = cc->iv_gen_ops->generator(cc, iv, dmreq);
@@ -869,7 +892,7 @@ static int crypt_convert_block(struct crypt_config *cc,
 	}
 
 	skcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out,
-				   1 << SECTOR_SHIFT, iv);
+				   cc->sector_size, iv);
 
 	if (bio_data_dir(ctx->bio_in) == WRITE)
 		r = crypto_skcipher_encrypt(req);
@@ -919,6 +942,7 @@ static void crypt_free_req(struct crypt_config *cc,
 static int crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
+	unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
 	int r;
 
 	atomic_set(&ctx->cc_pending, 1);
@@ -926,7 +950,6 @@ static int crypt_convert(struct crypt_config *cc,
 	while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
 
 		crypt_alloc_req(cc, ctx);
-
 		atomic_inc(&ctx->cc_pending);
 
 		r = crypt_convert_block(cc, ctx, ctx->req);
@@ -946,14 +969,14 @@ static int crypt_convert(struct crypt_config *cc,
 		 */
 		case -EINPROGRESS:
 			ctx->req = NULL;
-			ctx->cc_sector++;
+			ctx->cc_sector += sector_step;
 			continue;
 		/*
 		 * The request was already processed (synchronously).
 		 */
 		case 0:
 			atomic_dec(&ctx->cc_pending);
-			ctx->cc_sector++;
+			ctx->cc_sector += sector_step;
 			cond_resched();
 			continue;
 
@@ -1468,6 +1491,13 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode)
 		}
 	}
 
+	/*
+	 * dm-crypt performance can vary greatly depending on which crypto
+	 * algorithm implementation is used.  Help people debug performance
+	 * problems by logging the ->cra_driver_name.
+	 */
+	DMINFO("%s using implementation \"%s\"", ciphermode,
+	       crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
 	return 0;
 }
 
@@ -1743,7 +1773,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	char dummy;
 
 	static struct dm_arg _args[] = {
-		{0, 3, "Invalid number of feature args"},
+		{0, 5, "Invalid number of feature args"},
 	};
 
 	if (argc < 5) {
@@ -1759,6 +1789,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -ENOMEM;
 	}
 	cc->key_size = key_size;
+	cc->sector_size = (1 << SECTOR_SHIFT);
+	cc->sector_shift = 0;
 
 	ti->private = cc;
 	ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
@@ -1810,7 +1842,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	mutex_init(&cc->bio_alloc_lock);
 
 	ret = -EINVAL;
-	if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
+	if ((sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) ||
+	    (tmpll & ((cc->sector_size >> SECTOR_SHIFT) - 1))) {
 		ti->error = "Invalid iv_offset sector";
 		goto bad;
 	}
@@ -1858,6 +1891,21 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 			else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
 				set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
 
+			else if (sscanf(opt_string, "sector_size:%hu%c",
+					&cc->sector_size, &dummy) == 1) {
+				if (cc->sector_size < (1 << SECTOR_SHIFT) ||
+				    cc->sector_size > 4096 ||
+				    (cc->sector_size & (cc->sector_size - 1))) {
+					ti->error = "Invalid feature value for sector_size";
+					goto bad;
+				}
+				if (ti->len & ((cc->sector_size >> SECTOR_SHIFT) - 1)) {
+					ti->error = "Device size is not multiple of sector_size feature";
+					goto bad;
+				}
+				cc->sector_shift = __ffs(cc->sector_size) - SECTOR_SHIFT;
+			} else if (!strcasecmp(opt_string, "iv_large_sectors"))
+				set_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
 			else {
 				ti->error = "Invalid feature arguments";
 				goto bad;
@@ -1938,6 +1986,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	    bio_data_dir(bio) == WRITE)
 		dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
 
+	/*
+	 * Ensure that bio is a multiple of internal sector encryption size
+	 * and is aligned to this size as defined in IO hints.
+	 */
+	if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
+		return -EIO;
+
+	if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
+		return -EIO;
+
 	io = dm_per_bio_data(bio, cc->per_bio_data_size);
 	crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
 	io->ctx.req = (struct skcipher_request *)(io + 1);
@@ -1978,6 +2036,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 		num_feature_args += !!ti->num_discard_bios;
 		num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
 		num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+		num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);
+		num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
 		if (num_feature_args) {
 			DMEMIT(" %d", num_feature_args);
 			if (ti->num_discard_bios)
@@ -1986,6 +2046,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 				DMEMIT(" same_cpu_crypt");
 			if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
 				DMEMIT(" submit_from_crypt_cpus");
+			if (cc->sector_size != (1 << SECTOR_SHIFT))
+				DMEMIT(" sector_size:%d", cc->sector_size);
+			if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
+				DMEMIT(" iv_large_sectors");
 		}
 
 		break;
@@ -2068,6 +2132,8 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
+	struct crypt_config *cc = ti->private;
+
 	/*
 	 * Unfortunate constraint that is required to avoid the potential
 	 * for exceeding underlying device's max_segments limits -- due to
@@ -2075,11 +2141,17 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	 * bio that are not as physically contiguous as the original bio.
 	 */
 	limits->max_segment_size = PAGE_SIZE;
+
+	limits->logical_block_size =
+		max_t(unsigned short, limits->logical_block_size, cc->sector_size);
+	limits->physical_block_size =
+		max_t(unsigned, limits->physical_block_size, cc->sector_size);
+	limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size);
 }
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 14, 1},
+	.version = {1, 17, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,

diff --git a/drivers/md/dm-verity-avb.c b/drivers/md/dm-verity-avb.c
new file mode 100644
index 0000000..89f95e4
--- /dev/null
+++ b/drivers/md/dm-verity-avb.c

@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2017 Google.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Based on drivers/md/dm-verity-chromeos.c
+ */
+
+#include <linux/device-mapper.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+
+#define DM_MSG_PREFIX "verity-avb"
+
+/* Set via module parameters. */
+static char avb_vbmeta_device[64];
+static char avb_invalidate_on_error[4];
+
+static void invalidate_vbmeta_endio(struct bio *bio)
+{
+	if (bio->bi_error)
+		DMERR("invalidate_vbmeta_endio: error %d", bio->bi_error);
+	complete(bio->bi_private);
+}
+
+static int invalidate_vbmeta_submit(struct bio *bio,
+				    struct block_device *bdev,
+				    int op, int access_last_sector,
+				    struct page *page)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	bio->bi_private = &wait;
+	bio->bi_end_io = invalidate_vbmeta_endio;
+	bio->bi_bdev = bdev;
+	bio_set_op_attrs(bio, op, REQ_SYNC | REQ_NOIDLE);
+
+	bio->bi_iter.bi_sector = 0;
+	if (access_last_sector) {
+		sector_t last_sector;
+
+		last_sector = (i_size_read(bdev->bd_inode)>>SECTOR_SHIFT) - 1;
+		bio->bi_iter.bi_sector = last_sector;
+	}
+	if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
+		DMERR("invalidate_vbmeta_submit: bio_add_page error");
+		return -EIO;
+	}
+
+	submit_bio(bio);
+	/* Wait up to 2 seconds for completion or fail. */
+	if (!wait_for_completion_timeout(&wait, msecs_to_jiffies(2000)))
+		return -EIO;
+	return 0;
+}
+
+static int invalidate_vbmeta(dev_t vbmeta_devt)
+{
+	int ret = 0;
+	struct block_device *bdev;
+	struct bio *bio;
+	struct page *page;
+	fmode_t dev_mode;
+	/* Ensure we do synchronous unblocked I/O. We may also need
+	 * sync_bdev() on completion, but it really shouldn't.
+	 */
+	int access_last_sector = 0;
+
+	DMINFO("invalidate_vbmeta: acting on device %d:%d",
+	       MAJOR(vbmeta_devt), MINOR(vbmeta_devt));
+
+	/* First we open the device for reading. */
+	dev_mode = FMODE_READ | FMODE_EXCL;
+	bdev = blkdev_get_by_dev(vbmeta_devt, dev_mode,
+				 invalidate_vbmeta);
+	if (IS_ERR(bdev)) {
+		DMERR("invalidate_kernel: could not open device for reading");
+		dev_mode = 0;
+		ret = -ENOENT;
+		goto failed_to_read;
+	}
+
+	bio = bio_alloc(GFP_NOIO, 1);
+	if (!bio) {
+		ret = -ENOMEM;
+		goto failed_bio_alloc;
+	}
+
+	page = alloc_page(GFP_NOIO);
+	if (!page) {
+		ret = -ENOMEM;
+		goto failed_to_alloc_page;
+	}
+
+	access_last_sector = 0;
+	ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_READ,
+				       access_last_sector, page);
+	if (ret) {
+		DMERR("invalidate_vbmeta: error reading");
+		goto failed_to_submit_read;
+	}
+
+	/* We have a page. Let's make sure it looks right. */
+	if (memcmp("AVB0", page_address(page), 4) == 0) {
+		/* Stamp it. */
+		memcpy(page_address(page), "AVE0", 4);
+		DMINFO("invalidate_vbmeta: found vbmeta partition");
+	} else {
+		/* Could be this is on a AVB footer, check. Also, since the
+		 * AVB footer is in the last 64 bytes, adjust for the fact that
+		 * we're dealing with 512-byte sectors.
+		 */
+		size_t offset = (1<<SECTOR_SHIFT) - 64;
+
+		access_last_sector = 1;
+		ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_READ,
+					       access_last_sector, page);
+		if (ret) {
+			DMERR("invalidate_vbmeta: error reading");
+			goto failed_to_submit_read;
+		}
+		if (memcmp("AVBf", page_address(page) + offset, 4) != 0) {
+			DMERR("invalidate_vbmeta on non-vbmeta partition");
+			ret = -EINVAL;
+			goto invalid_header;
+		}
+		/* Stamp it. */
+		memcpy(page_address(page) + offset, "AVE0", 4);
+		DMINFO("invalidate_vbmeta: found vbmeta footer partition");
+	}
+
+	/* Now rewrite the changed page - the block dev was being
+	 * changed on read. Let's reopen here.
+	 */
+	blkdev_put(bdev, dev_mode);
+	dev_mode = FMODE_WRITE | FMODE_EXCL;
+	bdev = blkdev_get_by_dev(vbmeta_devt, dev_mode,
+				 invalidate_vbmeta);
+	if (IS_ERR(bdev)) {
+		DMERR("invalidate_vbmeta: could not open device for writing");
+		dev_mode = 0;
+		ret = -ENOENT;
+		goto failed_to_write;
+	}
+
+	/* We re-use the same bio to do the write after the read. Need to reset
+	 * it to initialize bio->bi_remaining.
+	 */
+	bio_reset(bio);
+
+	ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_WRITE,
+				       access_last_sector, page);
+	if (ret) {
+		DMERR("invalidate_vbmeta: error writing");
+		goto failed_to_submit_write;
+	}
+
+	DMERR("invalidate_vbmeta: completed.");
+	ret = 0;
+failed_to_submit_write:
+failed_to_write:
+invalid_header:
+	__free_page(page);
+failed_to_submit_read:
+	/* Technically, we'll leak a page with the pending bio, but
+	 * we're about to reboot anyway.
+	 */
+failed_to_alloc_page:
+	bio_put(bio);
+failed_bio_alloc:
+	if (dev_mode)
+		blkdev_put(bdev, dev_mode);
+failed_to_read:
+	return ret;
+}
+
+void dm_verity_avb_error_handler(void)
+{
+	dev_t dev;
+
+	DMINFO("AVB error handler called for %s", avb_vbmeta_device);
+
+	if (strcmp(avb_invalidate_on_error, "yes") != 0) {
+		DMINFO("Not configured to invalidate");
+		return;
+	}
+
+	if (avb_vbmeta_device[0] == '\0') {
+		DMERR("avb_vbmeta_device parameter not set");
+		goto fail_no_dev;
+	}
+
+	dev = name_to_dev_t(avb_vbmeta_device);
+	if (!dev) {
+		DMERR("No matching partition for device: %s",
+		      avb_vbmeta_device);
+		goto fail_no_dev;
+	}
+
+	invalidate_vbmeta(dev);
+
+fail_no_dev:
+	;
+}
+
+static int __init dm_verity_avb_init(void)
+{
+	DMINFO("AVB error handler initialized with vbmeta device: %s",
+	       avb_vbmeta_device);
+	return 0;
+}
+
+static void __exit dm_verity_avb_exit(void)
+{
+}
+
+module_init(dm_verity_avb_init);
+module_exit(dm_verity_avb_exit);
+
+MODULE_AUTHOR("David Zeuthen <zeuthen@google.com>");
+MODULE_DESCRIPTION("AVB-specific error handler for dm-verity");
+MODULE_LICENSE("GPL");
+
+/* Declare parameter with no module prefix */
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX	"androidboot.vbmeta."
+module_param_string(device, avb_vbmeta_device, sizeof(avb_vbmeta_device), 0);
+module_param_string(invalidate_on_error, avb_invalidate_on_error,
+		    sizeof(avb_invalidate_on_error), 0);

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index ac23441..1a1fac4 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c

@@ -235,8 +235,12 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
 	if (v->mode == DM_VERITY_MODE_LOGGING)
 		return 0;
 
-	if (v->mode == DM_VERITY_MODE_RESTART)
+	if (v->mode == DM_VERITY_MODE_RESTART) {
+#ifdef CONFIG_DM_VERITY_AVB
+		dm_verity_avb_error_handler();
+#endif
 		kernel_restart("dm-verity device corrupted");
+	}
 
 	return 1;
 }
@@ -991,6 +995,15 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		v->tfm = NULL;
 		goto bad;
 	}
+
+	/*
+	 * dm-verity performance can vary greatly depending on which hash
+	 * algorithm implementation is used.  Help people debug performance
+	 * problems by logging the ->cra_driver_name.
+	 */
+	DMINFO("%s using implementation \"%s\"", v->alg_name,
+	       crypto_shash_alg(v->tfm)->base.cra_driver_name);
+
 	v->digest_size = crypto_shash_digestsize(v->tfm);
 	if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
 		ti->error = "Digest size too big";
@@ -1042,6 +1055,14 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			goto bad;
 	}
 
+#ifdef CONFIG_DM_ANDROID_VERITY_AT_MOST_ONCE_DEFAULT_ENABLED
+	if (!v->validated_blocks) {
+		r = verity_alloc_most_once(v);
+		if (r)
+			goto bad;
+	}
+#endif
+
 	v->hash_per_block_bits =
 		__fls((1 << v->hash_dev_block_bits) / v->digest_size);
 

diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 6d6d8df..d216fc76 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h

@@ -137,4 +137,5 @@ extern void verity_io_hints(struct dm_target *ti, struct queue_limits *limits);
 extern void verity_dtr(struct dm_target *ti);
 extern int verity_ctr(struct dm_target *ti, unsigned argc, char **argv);
 extern int verity_map(struct dm_target *ti, struct bio *bio);
+extern void dm_verity_avb_error_handler(void);
 #endif /* DM_VERITY_H */

diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h
index cfa1039..9966891 100644
--- a/drivers/misc/lkdtm.h
+++ b/drivers/misc/lkdtm.h

@@ -23,6 +23,7 @@ void lkdtm_ATOMIC_UNDERFLOW(void);
 void lkdtm_ATOMIC_OVERFLOW(void);
 void lkdtm_CORRUPT_LIST_ADD(void);
 void lkdtm_CORRUPT_LIST_DEL(void);
+void lkdtm_CORRUPT_USER_DS(void);
 
 /* lkdtm_heap.c */
 void lkdtm_OVERWRITE_ALLOCATION(void);

diff --git a/drivers/misc/lkdtm_bugs.c b/drivers/misc/lkdtm_bugs.c
index f36674e..a88a114 100644
--- a/drivers/misc/lkdtm_bugs.c
+++ b/drivers/misc/lkdtm_bugs.c

@@ -7,6 +7,7 @@
 #include "lkdtm.h"
 #include <linux/list.h>
 #include <linux/sched.h>
+#include <linux/uaccess.h>
 
 struct lkdtm_list {
 	struct list_head node;
@@ -220,3 +221,12 @@ void lkdtm_CORRUPT_LIST_DEL(void)
 	else
 		pr_err("list_del() corruption not detected!\n");
 }
+
+void lkdtm_CORRUPT_USER_DS(void)
+{
+	pr_info("setting bad task size limit\n");
+	set_fs(KERNEL_DS);
+
+	/* Make sure we do not keep running with a KERNEL_DS! */
+	force_sig(SIGKILL, current);
+}

diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c
index 4d44084..b72fb64 100644
--- a/drivers/misc/lkdtm_core.c
+++ b/drivers/misc/lkdtm_core.c

@@ -199,6 +199,7 @@ struct crashtype crashtypes[] = {
 	CRASHTYPE(OVERFLOW),
 	CRASHTYPE(CORRUPT_LIST_ADD),
 	CRASHTYPE(CORRUPT_LIST_DEL),
+	CRASHTYPE(CORRUPT_USER_DS),
 	CRASHTYPE(CORRUPT_STACK),
 	CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
 	CRASHTYPE(OVERWRITE_ALLOCATION),

diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c
index a940f97..345f229 100644
--- a/drivers/misc/uid_sys_stats.c
+++ b/drivers/misc/uid_sys_stats.c

@@ -130,7 +130,7 @@ static void get_full_task_comm(struct task_entry *task_entry,
 	struct mm_struct *mm = task->mm;
 
 	/* fill the first TASK_COMM_LEN bytes with thread name */
-	get_task_comm(task_entry->comm, task);
+	__get_task_comm(task_entry->comm, TASK_COMM_LEN, task);
 	i = strlen(task_entry->comm);
 	while (i < TASK_COMM_LEN)
 		task_entry->comm[i++] = ' ';

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index bb2270b..b3670ee 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig

@@ -100,6 +100,13 @@
 
 	  If you choose to build a module, it'll be called rndis_wlan.
 
+config VIRT_WIFI
+	tristate "Wifi wrapper for ethernet drivers"
+	depends on CFG80211
+	---help---
+	  This option adds support for ethernet connections to appear as if they
+	  are wifi connections through a special rtnetlink device.
+
 config WCNSS_MEM_PRE_ALLOC
        tristate "WCNSS pre-alloc memory support"
        ---help---

diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile
index 917a876..95df8d7 100644
--- a/drivers/net/wireless/Makefile
+++ b/drivers/net/wireless/Makefile

@@ -26,6 +26,7 @@
 
 obj-$(CONFIG_MAC80211_HWSIM)	+= mac80211_hwsim.o
 
+obj-$(CONFIG_VIRT_WIFI)	+= virt_wifi.o
 obj-$(CONFIG_CNSS2)	+= cnss2/
 obj-$(CONFIG_CNSS)              += cnss/
 obj-$(CONFIG_WCNSS_MEM_PRE_ALLOC) += cnss_prealloc/

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 95b2d83..bfddabb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c

@@ -2776,7 +2776,6 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
 				   struct brcmf_bss_info_le *bi)
 {
 	struct wiphy *wiphy = cfg_to_wiphy(cfg);
-	struct ieee80211_channel *notify_channel;
 	struct cfg80211_bss *bss;
 	struct ieee80211_supported_band *band;
 	struct brcmu_chan ch;
@@ -2786,7 +2785,7 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
 	u16 notify_interval;
 	u8 *notify_ie;
 	size_t notify_ielen;
-	s32 notify_signal;
+	struct cfg80211_inform_bss bss_data = {};
 
 	if (le32_to_cpu(bi->length) > WL_BSS_INFO_MAX) {
 		brcmf_err("Bss info is larger than buffer. Discarding\n");
@@ -2806,27 +2805,28 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
 		band = wiphy->bands[NL80211_BAND_5GHZ];
 
 	freq = ieee80211_channel_to_frequency(channel, band->band);
-	notify_channel = ieee80211_get_channel(wiphy, freq);
+	bss_data.chan = ieee80211_get_channel(wiphy, freq);
+	bss_data.scan_width = NL80211_BSS_CHAN_WIDTH_20;
+	bss_data.boottime_ns = ktime_to_ns(ktime_get_boottime());
 
 	notify_capability = le16_to_cpu(bi->capability);
 	notify_interval = le16_to_cpu(bi->beacon_period);
 	notify_ie = (u8 *)bi + le16_to_cpu(bi->ie_offset);
 	notify_ielen = le32_to_cpu(bi->ie_length);
-	notify_signal = (s16)le16_to_cpu(bi->RSSI) * 100;
+	bss_data.signal = (s16)le16_to_cpu(bi->RSSI) * 100;
 
 	brcmf_dbg(CONN, "bssid: %pM\n", bi->BSSID);
 	brcmf_dbg(CONN, "Channel: %d(%d)\n", channel, freq);
 	brcmf_dbg(CONN, "Capability: %X\n", notify_capability);
 	brcmf_dbg(CONN, "Beacon interval: %d\n", notify_interval);
-	brcmf_dbg(CONN, "Signal: %d\n", notify_signal);
+	brcmf_dbg(CONN, "Signal: %d\n", bss_data.signal);
 
-	bss = cfg80211_inform_bss(wiphy, notify_channel,
-				  CFG80211_BSS_FTYPE_UNKNOWN,
-				  (const u8 *)bi->BSSID,
-				  0, notify_capability,
-				  notify_interval, notify_ie,
-				  notify_ielen, notify_signal,
-				  GFP_KERNEL);
+	bss = cfg80211_inform_bss_data(wiphy, &bss_data,
+				       CFG80211_BSS_FTYPE_UNKNOWN,
+				       (const u8 *)bi->BSSID,
+				       0, notify_capability,
+				       notify_interval, notify_ie,
+				       notify_ielen, GFP_KERNEL);
 
 	if (!bss)
 		return -ENOMEM;

diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c
new file mode 100644
index 0000000..17dbc36
--- /dev/null
+++ b/drivers/net/wireless/virt_wifi.c

@@ -0,0 +1,631 @@
+// SPDX-License-Identifier: GPL-2.0
+/* drivers/net/wireless/virt_wifi.c
+ *
+ * A fake implementation of cfg80211_ops that can be tacked on to an ethernet
+ * net_device to make it appear as a wireless connection.
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * Author: schuffelen@google.com
+ */
+
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+
+static struct wiphy *common_wiphy;
+
+struct virt_wifi_wiphy_priv {
+	struct delayed_work scan_result;
+	struct cfg80211_scan_request *scan_request;
+	bool being_deleted;
+};
+
+static struct ieee80211_channel channel_2ghz = {
+	.band = NL80211_BAND_2GHZ,
+	.center_freq = 2432,
+	.hw_value = 2432,
+	.max_power = 20,
+};
+
+static struct ieee80211_rate bitrates_2ghz[] = {
+	{ .bitrate = 10 },
+	{ .bitrate = 20 },
+	{ .bitrate = 55 },
+	{ .bitrate = 110 },
+	{ .bitrate = 60 },
+	{ .bitrate = 120 },
+	{ .bitrate = 240 },
+};
+
+static struct ieee80211_supported_band band_2ghz = {
+	.channels = &channel_2ghz,
+	.bitrates = bitrates_2ghz,
+	.band = NL80211_BAND_2GHZ,
+	.n_channels = 1,
+	.n_bitrates = ARRAY_SIZE(bitrates_2ghz),
+	.ht_cap = {
+		.ht_supported = true,
+		.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		       IEEE80211_HT_CAP_GRN_FLD |
+		       IEEE80211_HT_CAP_SGI_20 |
+		       IEEE80211_HT_CAP_SGI_40 |
+		       IEEE80211_HT_CAP_DSSSCCK40,
+		.ampdu_factor = 0x3,
+		.ampdu_density = 0x6,
+		.mcs = {
+			.rx_mask = {0xff, 0xff},
+			.tx_params = IEEE80211_HT_MCS_TX_DEFINED,
+		},
+	},
+};
+
+static struct ieee80211_channel channel_5ghz = {
+	.band = NL80211_BAND_5GHZ,
+	.center_freq = 5240,
+	.hw_value = 5240,
+	.max_power = 20,
+};
+
+static struct ieee80211_rate bitrates_5ghz[] = {
+	{ .bitrate = 60 },
+	{ .bitrate = 120 },
+	{ .bitrate = 240 },
+};
+
+#define RX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 14)
+
+#define TX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 14)
+
+static struct ieee80211_supported_band band_5ghz = {
+	.channels = &channel_5ghz,
+	.bitrates = bitrates_5ghz,
+	.band = NL80211_BAND_5GHZ,
+	.n_channels = 1,
+	.n_bitrates = ARRAY_SIZE(bitrates_5ghz),
+	.ht_cap = {
+		.ht_supported = true,
+		.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		       IEEE80211_HT_CAP_GRN_FLD |
+		       IEEE80211_HT_CAP_SGI_20 |
+		       IEEE80211_HT_CAP_SGI_40 |
+		       IEEE80211_HT_CAP_DSSSCCK40,
+		.ampdu_factor = 0x3,
+		.ampdu_density = 0x6,
+		.mcs = {
+			.rx_mask = {0xff, 0xff},
+			.tx_params = IEEE80211_HT_MCS_TX_DEFINED,
+		},
+	},
+	.vht_cap = {
+		.vht_supported = true,
+		.cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
+		       IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ |
+		       IEEE80211_VHT_CAP_RXLDPC |
+		       IEEE80211_VHT_CAP_SHORT_GI_80 |
+		       IEEE80211_VHT_CAP_SHORT_GI_160 |
+		       IEEE80211_VHT_CAP_TXSTBC |
+		       IEEE80211_VHT_CAP_RXSTBC_1 |
+		       IEEE80211_VHT_CAP_RXSTBC_2 |
+		       IEEE80211_VHT_CAP_RXSTBC_3 |
+		       IEEE80211_VHT_CAP_RXSTBC_4 |
+		       IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK,
+		.vht_mcs = {
+			.rx_mcs_map = cpu_to_le16(RX_MCS_MAP),
+			.tx_mcs_map = cpu_to_le16(TX_MCS_MAP),
+		}
+	},
+};
+
+/* Assigned at module init. Guaranteed locally-administered and unicast. */
+static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {};
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_scan(struct wiphy *wiphy,
+			  struct cfg80211_scan_request *request)
+{
+	struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy);
+
+	wiphy_debug(wiphy, "scan\n");
+
+	if (priv->scan_request || priv->being_deleted)
+		return -EBUSY;
+
+	priv->scan_request = request;
+	schedule_delayed_work(&priv->scan_result, HZ * 2);
+
+	return 0;
+}
+
+/* Acquires and releases the rdev BSS lock. */
+static void virt_wifi_scan_result(struct work_struct *work)
+{
+	struct {
+		u8 tag;
+		u8 len;
+		u8 ssid[8];
+	} __packed ssid = {
+		.tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi",
+	};
+	struct cfg80211_bss *informed_bss;
+	struct virt_wifi_wiphy_priv *priv =
+		container_of(work, struct virt_wifi_wiphy_priv,
+			     scan_result.work);
+	struct wiphy *wiphy = priv_to_wiphy(priv);
+	struct cfg80211_scan_info scan_info = { .aborted = false };
+
+	informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
+					   CFG80211_BSS_FTYPE_PRESP,
+					   fake_router_bssid,
+					   ktime_get_boot_ns(),
+					   WLAN_CAPABILITY_ESS, 0,
+					   (void *)&ssid, sizeof(ssid),
+					   DBM_TO_MBM(-50), GFP_KERNEL);
+	cfg80211_put_bss(wiphy, informed_bss);
+
+	/* Schedules work which acquires and releases the rtnl lock. */
+	cfg80211_scan_done(priv->scan_request, &scan_info);
+	priv->scan_request = NULL;
+}
+
+/* May acquire and release the rdev BSS lock. */
+static void virt_wifi_cancel_scan(struct wiphy *wiphy)
+{
+	struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy);
+
+	cancel_delayed_work_sync(&priv->scan_result);
+	/* Clean up dangling callbacks if necessary. */
+	if (priv->scan_request) {
+		struct cfg80211_scan_info scan_info = { .aborted = true };
+		/* Schedules work which acquires and releases the rtnl lock. */
+		cfg80211_scan_done(priv->scan_request, &scan_info);
+		priv->scan_request = NULL;
+	}
+}
+
+struct virt_wifi_netdev_priv {
+	struct delayed_work connect;
+	struct net_device *lowerdev;
+	struct net_device *upperdev;
+	u32 tx_packets;
+	u32 tx_failed;
+	u8 connect_requested_bss[ETH_ALEN];
+	bool is_up;
+	bool is_connected;
+	bool being_deleted;
+};
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev,
+			     struct cfg80211_connect_params *sme)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(netdev);
+	bool could_schedule;
+
+	if (priv->being_deleted || !priv->is_up)
+		return -EBUSY;
+
+	could_schedule = schedule_delayed_work(&priv->connect, HZ * 2);
+	if (!could_schedule)
+		return -EBUSY;
+
+	if (sme->bssid)
+		ether_addr_copy(priv->connect_requested_bss, sme->bssid);
+	else
+		eth_zero_addr(priv->connect_requested_bss);
+
+	wiphy_debug(wiphy, "connect\n");
+
+	return 0;
+}
+
+/* Acquires and releases the rdev event lock. */
+static void virt_wifi_connect_complete(struct work_struct *work)
+{
+	struct virt_wifi_netdev_priv *priv =
+		container_of(work, struct virt_wifi_netdev_priv, connect.work);
+	u8 *requested_bss = priv->connect_requested_bss;
+	bool has_addr = !is_zero_ether_addr(requested_bss);
+	bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid);
+	u16 status = WLAN_STATUS_SUCCESS;
+
+	if (!priv->is_up || (has_addr && !right_addr))
+		status = WLAN_STATUS_UNSPECIFIED_FAILURE;
+	else
+		priv->is_connected = true;
+
+	/* Schedules an event that acquires the rtnl lock. */
+	cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0,
+				status, GFP_KERNEL);
+	netif_carrier_on(priv->upperdev);
+}
+
+/* May acquire and release the rdev event lock. */
+static void virt_wifi_cancel_connect(struct net_device *netdev)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(netdev);
+
+	/* If there is work pending, clean up dangling callbacks. */
+	if (cancel_delayed_work_sync(&priv->connect)) {
+		/* Schedules an event that acquires the rtnl lock. */
+		cfg80211_connect_result(priv->upperdev,
+					priv->connect_requested_bss, NULL, 0,
+					NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					GFP_KERNEL);
+	}
+}
+
+/* Called with the rtnl lock held. Acquires the rdev event lock. */
+static int virt_wifi_disconnect(struct wiphy *wiphy, struct net_device *netdev,
+				u16 reason_code)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(netdev);
+
+	if (priv->being_deleted)
+		return -EBUSY;
+
+	wiphy_debug(wiphy, "disconnect\n");
+	virt_wifi_cancel_connect(netdev);
+
+	cfg80211_disconnected(netdev, reason_code, NULL, 0, true, GFP_KERNEL);
+	priv->is_connected = false;
+	netif_carrier_off(netdev);
+
+	return 0;
+}
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev,
+				 const u8 *mac, struct station_info *sinfo)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	wiphy_debug(wiphy, "get_station\n");
+
+	if (!priv->is_connected || !ether_addr_equal(mac, fake_router_bssid))
+		return -ENOENT;
+
+	sinfo->filled = BIT_ULL(NL80211_STA_INFO_TX_PACKETS) |
+		BIT_ULL(NL80211_STA_INFO_TX_FAILED) |
+		BIT_ULL(NL80211_STA_INFO_SIGNAL) |
+		BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+	sinfo->tx_packets = priv->tx_packets;
+	sinfo->tx_failed = priv->tx_failed;
+	/* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_ */
+	sinfo->signal = -50;
+	sinfo->txrate = (struct rate_info) {
+		.legacy = 10, /* units are 100kbit/s */
+	};
+	return 0;
+}
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev,
+				  int idx, u8 *mac, struct station_info *sinfo)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	wiphy_debug(wiphy, "dump_station\n");
+
+	if (idx != 0 || !priv->is_connected)
+		return -ENOENT;
+
+	ether_addr_copy(mac, fake_router_bssid);
+	return virt_wifi_get_station(wiphy, dev, fake_router_bssid, sinfo);
+}
+
+static const struct cfg80211_ops virt_wifi_cfg80211_ops = {
+	.scan = virt_wifi_scan,
+
+	.connect = virt_wifi_connect,
+	.disconnect = virt_wifi_disconnect,
+
+	.get_station = virt_wifi_get_station,
+	.dump_station = virt_wifi_dump_station,
+};
+
+/* Acquires and releases the rtnl lock. */
+static struct wiphy *virt_wifi_make_wiphy(void)
+{
+	struct wiphy *wiphy;
+	struct virt_wifi_wiphy_priv *priv;
+	int err;
+
+	wiphy = wiphy_new(&virt_wifi_cfg80211_ops, sizeof(*priv));
+
+	if (!wiphy)
+		return NULL;
+
+	wiphy->max_scan_ssids = 4;
+	wiphy->max_scan_ie_len = 1000;
+	wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
+
+	wiphy->bands[NL80211_BAND_2GHZ] = &band_2ghz;
+	wiphy->bands[NL80211_BAND_5GHZ] = &band_5ghz;
+	wiphy->bands[NL80211_BAND_60GHZ] = NULL;
+
+	wiphy->regulatory_flags = REGULATORY_WIPHY_SELF_MANAGED;
+	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+
+	priv = wiphy_priv(wiphy);
+	priv->being_deleted = false;
+	priv->scan_request = NULL;
+	INIT_DELAYED_WORK(&priv->scan_result, virt_wifi_scan_result);
+
+	err = wiphy_register(wiphy);
+	if (err < 0) {
+		wiphy_free(wiphy);
+		return NULL;
+	}
+
+	return wiphy;
+}
+
+/* Acquires and releases the rtnl lock. */
+static void virt_wifi_destroy_wiphy(struct wiphy *wiphy)
+{
+	struct virt_wifi_wiphy_priv *priv;
+
+	WARN(!wiphy, "%s called with null wiphy", __func__);
+	if (!wiphy)
+		return;
+
+	priv = wiphy_priv(wiphy);
+	priv->being_deleted = true;
+	virt_wifi_cancel_scan(wiphy);
+
+	if (wiphy->registered)
+		wiphy_unregister(wiphy);
+	wiphy_free(wiphy);
+}
+
+/* Enters and exits a RCU-bh critical section. */
+static netdev_tx_t virt_wifi_start_xmit(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	priv->tx_packets++;
+	if (!priv->is_connected) {
+		priv->tx_failed++;
+		return NET_XMIT_DROP;
+	}
+
+	skb->dev = priv->lowerdev;
+	return dev_queue_xmit(skb);
+}
+
+/* Called with rtnl lock held. */
+static int virt_wifi_net_device_open(struct net_device *dev)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	priv->is_up = true;
+	return 0;
+}
+
+/* Called with rtnl lock held. */
+static int virt_wifi_net_device_stop(struct net_device *dev)
+{
+	struct virt_wifi_netdev_priv *n_priv = netdev_priv(dev);
+	struct virt_wifi_wiphy_priv *w_priv;
+
+	n_priv->is_up = false;
+
+	if (!dev->ieee80211_ptr)
+		return 0;
+	w_priv = wiphy_priv(dev->ieee80211_ptr->wiphy);
+
+	virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy);
+	virt_wifi_cancel_connect(dev);
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static const struct net_device_ops virt_wifi_ops = {
+	.ndo_start_xmit = virt_wifi_start_xmit,
+	.ndo_open = virt_wifi_net_device_open,
+	.ndo_stop = virt_wifi_net_device_stop,
+};
+
+/* Invoked as part of rtnl lock release. */
+static void virt_wifi_net_device_destructor(struct net_device *dev)
+{
+	/* Delayed past dellink to allow nl80211 to react to the device being
+	 * deleted.
+	 */
+	kfree(dev->ieee80211_ptr);
+	dev->ieee80211_ptr = NULL;
+	free_netdev(dev);
+}
+
+/* No lock interaction. */
+static void virt_wifi_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->netdev_ops = &virt_wifi_ops;
+	dev->destructor = virt_wifi_net_device_destructor;
+}
+
+/* Called in a RCU read critical section from netif_receive_skb */
+static rx_handler_result_t virt_wifi_rx_handler(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct virt_wifi_netdev_priv *priv =
+		rcu_dereference(skb->dev->rx_handler_data);
+
+	if (!priv->is_connected)
+		return RX_HANDLER_PASS;
+
+	/* GFP_ATOMIC because this is a packet interrupt handler. */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb) {
+		dev_err(&priv->upperdev->dev, "can't skb_share_check\n");
+		return RX_HANDLER_CONSUMED;
+	}
+
+	*pskb = skb;
+	skb->dev = priv->upperdev;
+	skb->pkt_type = PACKET_HOST;
+	return RX_HANDLER_ANOTHER;
+}
+
+/* Called with rtnl lock held. */
+static int virt_wifi_newlink(struct net *src_net, struct net_device *dev,
+			     struct nlattr *tb[], struct nlattr *data[])
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+	int err;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	netif_carrier_off(dev);
+
+	priv->upperdev = dev;
+	priv->lowerdev = __dev_get_by_index(src_net,
+					    nla_get_u32(tb[IFLA_LINK]));
+
+	if (!priv->lowerdev)
+		return -ENODEV;
+	if (!tb[IFLA_MTU])
+		dev->mtu = priv->lowerdev->mtu;
+	else if (dev->mtu > priv->lowerdev->mtu)
+		return -EINVAL;
+
+	err = netdev_rx_handler_register(priv->lowerdev, virt_wifi_rx_handler,
+					 priv);
+	if (err) {
+		dev_err(&priv->lowerdev->dev,
+			"can't netdev_rx_handler_register: %d\n", err);
+		return err;
+	}
+
+	eth_hw_addr_inherit(dev, priv->lowerdev);
+	netif_stacked_transfer_operstate(priv->lowerdev, dev);
+
+	SET_NETDEV_DEV(dev, &priv->lowerdev->dev);
+	dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL);
+
+	if (!dev->ieee80211_ptr)
+		goto remove_handler;
+
+	dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION;
+	dev->ieee80211_ptr->wiphy = common_wiphy;
+
+	err = register_netdevice(dev);
+	if (err) {
+		dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n",
+			err);
+		goto free_wireless_dev;
+	}
+
+	err = netdev_upper_dev_link(priv->lowerdev, dev);
+	if (err) {
+		dev_err(&priv->lowerdev->dev, "can't netdev_upper_dev_link: %d\n",
+			err);
+		goto unregister_netdev;
+	}
+
+	priv->being_deleted = false;
+	priv->is_connected = false;
+	priv->is_up = false;
+	INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete);
+
+	return 0;
+unregister_netdev:
+	unregister_netdevice(dev);
+free_wireless_dev:
+	kfree(dev->ieee80211_ptr);
+	dev->ieee80211_ptr = NULL;
+remove_handler:
+	netdev_rx_handler_unregister(priv->lowerdev);
+
+	return err;
+}
+
+/* Called with rtnl lock held. */
+static void virt_wifi_dellink(struct net_device *dev,
+			      struct list_head *head)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	if (dev->ieee80211_ptr)
+		virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy);
+
+	priv->being_deleted = true;
+	virt_wifi_cancel_connect(dev);
+	netif_carrier_off(dev);
+
+	netdev_rx_handler_unregister(priv->lowerdev);
+	netdev_upper_dev_unlink(priv->lowerdev, dev);
+
+	unregister_netdevice_queue(dev, head);
+
+	/* Deleting the wiphy is handled in the module destructor. */
+}
+
+static struct rtnl_link_ops virt_wifi_link_ops = {
+	.kind		= "virt_wifi",
+	.setup		= virt_wifi_setup,
+	.newlink	= virt_wifi_newlink,
+	.dellink	= virt_wifi_dellink,
+	.priv_size	= sizeof(struct virt_wifi_netdev_priv),
+};
+
+/* Acquires and releases the rtnl lock. */
+static int __init virt_wifi_init_module(void)
+{
+	int err;
+
+	/* Guaranteed to be locallly-administered and not multicast. */
+	eth_random_addr(fake_router_bssid);
+
+	common_wiphy = virt_wifi_make_wiphy();
+	if (!common_wiphy)
+		return -ENOMEM;
+
+	err = rtnl_link_register(&virt_wifi_link_ops);
+	if (err)
+		virt_wifi_destroy_wiphy(common_wiphy);
+
+	return err;
+}
+
+/* Acquires and releases the rtnl lock. */
+static void __exit virt_wifi_cleanup_module(void)
+{
+	/* Will delete any devices that depend on the wiphy. */
+	rtnl_link_unregister(&virt_wifi_link_ops);
+	virt_wifi_destroy_wiphy(common_wiphy);
+}
+
+module_init(virt_wifi_init_module);
+module_exit(virt_wifi_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Cody Schuffelen <schuffelen@google.com>");
+MODULE_DESCRIPTION("Driver for a wireless wrapper of ethernet devices");
+MODULE_ALIAS_RTNL_LINK("virt_wifi");

diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO
index dd64148..edfb680 100644
--- a/drivers/staging/android/TODO
+++ b/drivers/staging/android/TODO

@@ -39,7 +39,6 @@
    waiting threads. We should eventually use multiple queues and select the
    queue based on the region.
  - Add debugfs support for examining the permissions of regions.
- - Use ioremap_wc instead of ioremap_nocache.
  - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been
    superseded by the futex and is there for legacy reasons.
 

diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index db096ce..61f047b 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c

@@ -1229,9 +1229,6 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
 	int pages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
 	int i;
 
-	pr_debug("%s: syncing for device %s\n", __func__,
-		 dev ? dev_name(dev) : "null");
-
 	if (!ion_buffer_fault_user_mappings(buffer))
 		return;
 
@@ -1293,7 +1290,6 @@ static void ion_vm_close(struct vm_area_struct *vma)
 	struct ion_buffer *buffer = vma->vm_private_data;
 	struct ion_vma_list *vma_list, *tmp;
 
-	pr_debug("%s\n", __func__);
 	mutex_lock(&buffer->lock);
 	list_for_each_entry_safe(vma_list, tmp, &buffer->vmas, list) {
 		if (vma_list->vma != vma)
@@ -1696,7 +1692,6 @@ static int ion_release(struct inode *inode, struct file *file)
 {
 	struct ion_client *client = file->private_data;
 
-	pr_debug("%s: %d\n", __func__, __LINE__);
 	ion_client_destroy(client);
 	return 0;
 }
@@ -1708,7 +1703,6 @@ static int ion_open(struct inode *inode, struct file *file)
 	struct ion_client *client;
 	char debug_name[64];
 
-	pr_debug("%s: %d\n", __func__, __LINE__);
 	snprintf(debug_name, 64, "%u", task_pid_nr(current->group_leader));
 	client = ion_client_create(dev, debug_name);
 	if (IS_ERR(client))

diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index 587c66d..954ed2c 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c

@@ -81,8 +81,8 @@ struct vsoc_region_data {
 	atomic_t *incoming_signalled;
 	/* Flag indicating the guest has signalled the host. */
 	atomic_t *outgoing_signalled;
-	int irq_requested;
-	int device_created;
+	bool irq_requested;
+	bool device_created;
 };
 
 struct vsoc_device {
@@ -91,7 +91,7 @@ struct vsoc_device {
 	/* Physical address of SHARED_MEMORY_BAR. */
 	phys_addr_t shm_phys_start;
 	/* Kernel virtual address of SHARED_MEMORY_BAR. */
-	void *kernel_mapped_shm;
+	void __iomem *kernel_mapped_shm;
 	/* Size of the entire shared memory window in bytes. */
 	size_t shm_size;
 	/*
@@ -116,22 +116,23 @@ struct vsoc_device {
 	 * vsoc_region_data because the kernel deals with them as an array.
 	 */
 	struct msix_entry *msix_entries;
-	/*
-	 * Flags that indicate what we've initialzied. These are used to do an
-	 * orderly cleanup of the device.
-	 */
-	char enabled_device;
-	char requested_regions;
-	char cdev_added;
-	char class_added;
-	char msix_enabled;
 	/* Mutex that protectes the permission list */
 	struct mutex mtx;
 	/* Major number assigned by the kernel */
 	int major;
-
+	/* Character device assigned by the kernel */
 	struct cdev cdev;
+	/* Device class assigned by the kernel */
 	struct class *class;
+	/*
+	 * Flags that indicate what we've initialized. These are used to do an
+	 * orderly cleanup of the device.
+	 */
+	bool enabled_device;
+	bool requested_regions;
+	bool cdev_added;
+	bool class_added;
+	bool msix_enabled;
 };
 
 static struct vsoc_device vsoc_dev;
@@ -153,13 +154,13 @@ static long vsoc_ioctl(struct file *, unsigned int, unsigned long);
 static int vsoc_mmap(struct file *, struct vm_area_struct *);
 static int vsoc_open(struct inode *, struct file *);
 static int vsoc_release(struct inode *, struct file *);
-static ssize_t vsoc_read(struct file *, char *, size_t, loff_t *);
-static ssize_t vsoc_write(struct file *, const char *, size_t, loff_t *);
+static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *);
 static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin);
 static int do_create_fd_scoped_permission(
 	struct vsoc_device_region *region_p,
 	struct fd_scoped_permission_node *np,
-	struct fd_scoped_permission_arg *__user arg);
+	struct fd_scoped_permission_arg __user *arg);
 static void do_destroy_fd_scoped_permission(
 	struct vsoc_device_region *owner_region_p,
 	struct fd_scoped_permission *perm);
@@ -198,7 +199,7 @@ inline int vsoc_validate_filep(struct file *filp)
 /* Converts from shared memory offset to virtual address */
 static inline void *shm_off_to_virtual_addr(__u32 offset)
 {
-	return vsoc_dev.kernel_mapped_shm + offset;
+	return (void __force *)vsoc_dev.kernel_mapped_shm + offset;
 }
 
 /* Converts from shared memory offset to physical address */
@@ -261,7 +262,7 @@ static struct pci_driver vsoc_pci_driver = {
 static int do_create_fd_scoped_permission(
 	struct vsoc_device_region *region_p,
 	struct fd_scoped_permission_node *np,
-	struct fd_scoped_permission_arg *__user arg)
+	struct fd_scoped_permission_arg __user *arg)
 {
 	struct file *managed_filp;
 	s32 managed_fd;
@@ -632,11 +633,11 @@ static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	return 0;
 }
 
-static ssize_t vsoc_read(struct file *filp, char *buffer, size_t len,
+static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len,
 			 loff_t *poffset)
 {
 	__u32 area_off;
-	void *area_p;
+	const void *area_p;
 	ssize_t area_len;
 	int retval = vsoc_validate_filep(filp);
 
@@ -706,7 +707,7 @@ static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin)
 	return offset;
 }
 
-static ssize_t vsoc_write(struct file *filp, const char *buffer,
+static ssize_t vsoc_write(struct file *filp, const char __user *buffer,
 			  size_t len, loff_t *poffset)
 {
 	__u32 area_off;
@@ -772,14 +773,14 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 			pci_name(pdev), result);
 		return result;
 	}
-	vsoc_dev.enabled_device = 1;
+	vsoc_dev.enabled_device = true;
 	result = pci_request_regions(pdev, "vsoc");
 	if (result < 0) {
 		dev_err(&pdev->dev, "pci_request_regions failed\n");
 		vsoc_remove_device(pdev);
 		return -EBUSY;
 	}
-	vsoc_dev.requested_regions = 1;
+	vsoc_dev.requested_regions = true;
 	/* Set up the control registers in BAR 0 */
 	reg_size = pci_resource_len(pdev, REGISTER_BAR);
 	if (reg_size > MAX_REGISTER_BAR_LEN)
@@ -790,7 +791,7 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 
 	if (!vsoc_dev.regs) {
 		dev_err(&pdev->dev,
-			"cannot ioremap registers of size %zu\n",
+			"cannot map registers of size %zu\n",
 		       (size_t)reg_size);
 		vsoc_remove_device(pdev);
 		return -EBUSY;
@@ -800,19 +801,17 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 	vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR);
 	vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR);
 
-	dev_info(&pdev->dev, "shared memory @ DMA %p size=0x%zx\n",
-		 (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
-	/* TODO(ghartman): ioremap_wc should work here */
-	vsoc_dev.kernel_mapped_shm = ioremap_nocache(
-			vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
+	dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n",
+		 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
+	vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0);
 	if (!vsoc_dev.kernel_mapped_shm) {
 		dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n");
 		vsoc_remove_device(pdev);
 		return -EBUSY;
 	}
 
-	vsoc_dev.layout =
-	    (struct vsoc_shm_layout_descriptor *)vsoc_dev.kernel_mapped_shm;
+	vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *)
+				vsoc_dev.kernel_mapped_shm;
 	dev_info(&pdev->dev, "major_version: %d\n",
 		 vsoc_dev.layout->major_version);
 	dev_info(&pdev->dev, "minor_version: %d\n",
@@ -843,16 +842,16 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 		vsoc_remove_device(pdev);
 		return -EBUSY;
 	}
-	vsoc_dev.cdev_added = 1;
+	vsoc_dev.cdev_added = true;
 	vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME);
 	if (IS_ERR(vsoc_dev.class)) {
 		dev_err(&vsoc_dev.dev->dev, "class_create failed\n");
 		vsoc_remove_device(pdev);
 		return PTR_ERR(vsoc_dev.class);
 	}
-	vsoc_dev.class_added = 1;
-	vsoc_dev.regions = (struct vsoc_device_region *)
-		(vsoc_dev.kernel_mapped_shm +
+	vsoc_dev.class_added = true;
+	vsoc_dev.regions = (struct vsoc_device_region __force *)
+		((void *)vsoc_dev.layout +
 		 vsoc_dev.layout->vsoc_region_desc_offset);
 	vsoc_dev.msix_entries = kcalloc(
 			vsoc_dev.layout->region_count,
@@ -912,7 +911,7 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 			return -EFAULT;
 		}
 	}
-	vsoc_dev.msix_enabled = 1;
+	vsoc_dev.msix_enabled = true;
 	for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
 		const struct vsoc_device_region *region = vsoc_dev.regions + i;
 		size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1;
@@ -930,14 +929,11 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 				&vsoc_dev.regions_data[i].interrupt_wait_queue);
 		init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue);
 		vsoc_dev.regions_data[i].incoming_signalled =
-			vsoc_dev.kernel_mapped_shm +
-			region->region_begin_offset +
+			shm_off_to_virtual_addr(region->region_begin_offset) +
 			h_to_g_signal_table->interrupt_signalled_offset;
 		vsoc_dev.regions_data[i].outgoing_signalled =
-			vsoc_dev.kernel_mapped_shm +
-			region->region_begin_offset +
+			shm_off_to_virtual_addr(region->region_begin_offset) +
 			g_to_h_signal_table->interrupt_signalled_offset;
-
 		result = request_irq(
 				vsoc_dev.msix_entries[i].vector,
 				vsoc_interrupt, 0,
@@ -950,7 +946,7 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 			vsoc_remove_device(pdev);
 			return -ENOSPC;
 		}
-		vsoc_dev.regions_data[i].irq_requested = 1;
+		vsoc_dev.regions_data[i].irq_requested = true;
 		if (!device_create(vsoc_dev.class, NULL,
 				   MKDEV(vsoc_dev.major, i),
 				   NULL, vsoc_dev.regions_data[i].name)) {
@@ -958,7 +954,7 @@ static int vsoc_probe_device(struct pci_dev *pdev,
 			vsoc_remove_device(pdev);
 			return -EBUSY;
 		}
-		vsoc_dev.regions_data[i].device_created = 1;
+		vsoc_dev.regions_data[i].device_created = true;
 	}
 	return 0;
 }
@@ -990,51 +986,51 @@ static void vsoc_remove_device(struct pci_dev *pdev)
 			if (vsoc_dev.regions_data[i].device_created) {
 				device_destroy(vsoc_dev.class,
 					       MKDEV(vsoc_dev.major, i));
-				vsoc_dev.regions_data[i].device_created = 0;
+				vsoc_dev.regions_data[i].device_created = false;
 			}
 			if (vsoc_dev.regions_data[i].irq_requested)
 				free_irq(vsoc_dev.msix_entries[i].vector, NULL);
-			vsoc_dev.regions_data[i].irq_requested = 0;
+			vsoc_dev.regions_data[i].irq_requested = false;
 		}
 		kfree(vsoc_dev.regions_data);
-		vsoc_dev.regions_data = 0;
+		vsoc_dev.regions_data = NULL;
 	}
 	if (vsoc_dev.msix_enabled) {
 		pci_disable_msix(pdev);
-		vsoc_dev.msix_enabled = 0;
+		vsoc_dev.msix_enabled = false;
 	}
 	kfree(vsoc_dev.msix_entries);
-	vsoc_dev.msix_entries = 0;
-	vsoc_dev.regions = 0;
+	vsoc_dev.msix_entries = NULL;
+	vsoc_dev.regions = NULL;
 	if (vsoc_dev.class_added) {
 		class_destroy(vsoc_dev.class);
-		vsoc_dev.class_added = 0;
+		vsoc_dev.class_added = false;
 	}
 	if (vsoc_dev.cdev_added) {
 		cdev_del(&vsoc_dev.cdev);
-		vsoc_dev.cdev_added = 0;
+		vsoc_dev.cdev_added = false;
 	}
 	if (vsoc_dev.major && vsoc_dev.layout) {
 		unregister_chrdev_region(MKDEV(vsoc_dev.major, 0),
 					 vsoc_dev.layout->region_count);
 		vsoc_dev.major = 0;
 	}
-	vsoc_dev.layout = 0;
+	vsoc_dev.layout = NULL;
 	if (vsoc_dev.kernel_mapped_shm) {
 		pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm);
-		vsoc_dev.kernel_mapped_shm = 0;
+		vsoc_dev.kernel_mapped_shm = NULL;
 	}
 	if (vsoc_dev.regs) {
 		pci_iounmap(pdev, vsoc_dev.regs);
-		vsoc_dev.regs = 0;
+		vsoc_dev.regs = NULL;
 	}
 	if (vsoc_dev.requested_regions) {
 		pci_release_regions(pdev);
-		vsoc_dev.requested_regions = 0;
+		vsoc_dev.requested_regions = false;
 	}
 	if (vsoc_dev.enabled_device) {
 		pci_disable_device(pdev);
-		vsoc_dev.enabled_device = 0;
+		vsoc_dev.enabled_device = false;
 	}
 	/* Do this last: it indicates that the device is not initialized. */
 	vsoc_dev.dev = NULL;

diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
index 3f25946..a67e035 100644
--- a/drivers/usb/gadget/function/f_mtp.c
+++ b/drivers/usb/gadget/function/f_mtp.c

@@ -820,6 +820,11 @@ static void send_file_work(struct work_struct *data)
 	offset = dev->xfer_file_offset;
 	count = dev->xfer_file_length;
 
+	if (count < 0) {
+		dev->xfer_result = -EINVAL;
+		return;
+	}
+
 	DBG(cdev, "send_file_work(%lld %lld)\n", offset, count);
 
 	if (dev->xfer_send_header) {
@@ -936,6 +941,11 @@ static void receive_file_work(struct work_struct *data)
 	offset = dev->xfer_file_offset;
 	count = dev->xfer_file_length;
 
+	if (count < 0) {
+		dev->xfer_result = -EINVAL;
+		return;
+	}
+
 	DBG(cdev, "receive_file_work(%lld)\n", count);
 	if (!IS_ALIGNED(count, dev->ep_out->maxpacket))
 		DBG(cdev, "%s- count(%lld) not multiple of mtu(%d)\n", __func__,

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0004a54..8de4200 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c

@@ -166,6 +166,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
 
 static void f2fs_read_end_io(struct bio *bio)
 {
+	struct page *first_page = bio->bi_io_vec[0].bv_page;
+
 	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) {
 		f2fs_show_injection_info(FAULT_READ_IO);
 		bio->bi_error = -EIO;
@@ -179,6 +181,13 @@ static void f2fs_read_end_io(struct bio *bio)
 		return;
 	}
 
+	if (first_page != NULL &&
+		__read_io_type(first_page) == F2FS_RD_DATA) {
+		trace_android_fs_dataread_end(first_page->mapping->host,
+						page_offset(first_page),
+						bio->bi_iter.bi_size);
+	}
+
 	__read_end_io(bio);
 }
 
@@ -345,6 +354,32 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 	submit_bio(bio);
 }
 
+static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi,
+				struct bio *bio, enum page_type type)
+{
+	if (trace_android_fs_dataread_start_enabled() && (type == DATA)) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL &&
+			__read_io_type(first_page) == F2FS_RD_DATA) {
+			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+			path = android_fstrace_get_pathname(pathbuf,
+						MAX_TRACE_PATHBUF_LEN,
+						first_page->mapping->host);
+
+			trace_android_fs_dataread_start(
+				first_page->mapping->host,
+				page_offset(first_page),
+				bio->bi_iter.bi_size,
+				current->pid,
+				path,
+				current->comm);
+		}
+	}
+	__submit_bio(sbi, bio, type);
+}
+
 static void __submit_merged_bio(struct f2fs_bio_info *io)
 {
 	struct f2fs_io_info *fio = &io->fio;
@@ -497,7 +532,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	inc_page_count(fio->sbi, is_read_io(fio->op) ?
 			__read_io_type(page): WB_DATA_TYPE(fio->page));
 
-	__submit_bio(fio->sbi, bio, fio->type);
+	__f2fs_submit_read_bio(fio->sbi, bio, fio->type);
 	return 0;
 }
 
@@ -649,7 +684,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
 	}
 	ClearPageError(page);
 	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
-	__submit_bio(F2FS_I_SB(inode), bio, DATA);
+	__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 	return 0;
 }
 
@@ -1656,7 +1691,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
 		if (bio && (last_block_in_bio != block_nr - 1 ||
 			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
 submit_and_realloc:
-			__submit_bio(F2FS_I_SB(inode), bio, DATA);
+			__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
 		}
 
@@ -1698,7 +1733,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
 		goto next_page;
 confused:
 		if (bio) {
-			__submit_bio(F2FS_I_SB(inode), bio, DATA);
+			__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
 		}
 		unlock_page(page);
@@ -1708,7 +1743,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
-		__submit_bio(F2FS_I_SB(inode), bio, DATA);
+		__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 	return 0;
 }
 

diff --git a/fs/proc/uid.c b/fs/proc/uid.c
index 3fd7b9f..b2bb085 100644
--- a/fs/proc/uid.c
+++ b/fs/proc/uid.c

@@ -174,7 +174,7 @@ static int proc_uid_base_readdir(struct file *file, struct dir_context *ctx)
 		return 0;
 
 	for (u = uid_base_stuff + (ctx->pos - 2);
-	     u <= uid_base_stuff + nents - 1; u++) {
+	     u < uid_base_stuff + nents; u++) {
 		if (!proc_fill_cache(file, ctx, u->name, u->len,
 				     proc_uident_instantiate, NULL, u))
 			break;

diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
index e9426a6..cb573f1 100644
--- a/fs/sdcardfs/dentry.c
+++ b/fs/sdcardfs/dentry.c

@@ -51,7 +51,6 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	 * whether the base obbpath has been changed or not
 	 */
 	if (is_obbpath_invalid(dentry)) {
-		d_drop(dentry);
 		return 0;
 	}
 
@@ -65,7 +64,6 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) {
 		err = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
 		if (err == 0) {
-			d_drop(dentry);
 			goto out;
 		}
 	}
@@ -73,14 +71,12 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	spin_lock(&lower_dentry->d_lock);
 	if (d_unhashed(lower_dentry)) {
 		spin_unlock(&lower_dentry->d_lock);
-		d_drop(dentry);
 		err = 0;
 		goto out;
 	}
 	spin_unlock(&lower_dentry->d_lock);
 
 	if (parent_lower_dentry != lower_cur_parent_dentry) {
-		d_drop(dentry);
 		err = 0;
 		goto out;
 	}
@@ -94,7 +90,6 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	}
 
 	if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) {
-		__d_drop(dentry);
 		err = 0;
 	}
 
@@ -113,7 +108,6 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	if (inode) {
 		data = top_data_get(SDCARDFS_I(inode));
 		if (!data || data->abandoned) {
-			d_drop(dentry);
 			err = 0;
 		}
 		if (data)
@@ -129,8 +123,16 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	return err;
 }
 
+/* 1 = delete, 0 = cache */
+static int sdcardfs_d_delete(const struct dentry *d)
+{
+	return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0;
+}
+
 static void sdcardfs_d_release(struct dentry *dentry)
 {
+	if (!dentry || !dentry->d_fsdata)
+		return;
 	/* release and reset the lower paths */
 	if (has_graft_path(dentry))
 		sdcardfs_put_reset_orig_path(dentry);
@@ -185,6 +187,7 @@ static void sdcardfs_canonical_path(const struct path *path,
 
 const struct dentry_operations sdcardfs_ci_dops = {
 	.d_revalidate	= sdcardfs_d_revalidate,
+	.d_delete	= sdcardfs_d_delete,
 	.d_release	= sdcardfs_d_release,
 	.d_hash	= sdcardfs_hash_ci,
 	.d_compare	= sdcardfs_cmp_ci,

diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
index 0b3b223..78a669c 100644
--- a/fs/sdcardfs/derived_perm.c
+++ b/fs/sdcardfs/derived_perm.c

@@ -62,6 +62,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
 	int err;
 	struct qstr q_Android = QSTR_LITERAL("Android");
 	struct qstr q_data = QSTR_LITERAL("data");
+	struct qstr q_sandbox = QSTR_LITERAL("sandbox");
 	struct qstr q_obb = QSTR_LITERAL("obb");
 	struct qstr q_media = QSTR_LITERAL("media");
 	struct qstr q_cache = QSTR_LITERAL("cache");
@@ -110,6 +111,9 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
 		if (qstr_case_eq(name, &q_data)) {
 			/* App-specific directories inside; let anyone traverse */
 			info->data->perm = PERM_ANDROID_DATA;
+		} else if (qstr_case_eq(name, &q_sandbox)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_DATA;
 		} else if (qstr_case_eq(name, &q_obb)) {
 			/* App-specific directories inside; let anyone traverse */
 			info->data->perm = PERM_ANDROID_OBB;
@@ -356,7 +360,8 @@ int need_graft_path(struct dentry *dentry)
 	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	struct qstr obb = QSTR_LITERAL("obb");
 
-	if (parent_info->data->perm == PERM_ANDROID &&
+	if (!sbi->options.unshared_obb &&
+			parent_info->data->perm == PERM_ANDROID &&
 			qstr_case_eq(&dentry->d_name, &obb)) {
 
 		/* /Android/obb is the base obbpath of DERIVED_UNIFIED */

diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index de34561..7c08ffe 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c

@@ -205,6 +205,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	struct dentry *lower_dentry;
 	struct vfsmount *lower_mnt;
 	struct dentry *lower_parent_dentry = NULL;
+	struct dentry *parent_dentry = NULL;
 	struct path lower_path;
 	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
@@ -227,11 +228,14 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 		return -ENOMEM;
 
 	/* check disk space */
-	if (!check_min_free_space(dentry, 0, 1)) {
+	parent_dentry = dget_parent(dentry);
+	if (!check_min_free_space(parent_dentry, 0, 1)) {
 		pr_err("sdcardfs: No minimum free space.\n");
 		err = -ENOSPC;
+		dput(parent_dentry);
 		goto out_revert;
 	}
+	dput(parent_dentry);
 
 	/* the lower_dentry is negative here */
 	sdcardfs_get_lower_path(dentry, &lower_path);

diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index a6e6615..f0a0dba 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c

@@ -43,8 +43,6 @@ void sdcardfs_destroy_dentry_cache(void)
 
 void free_dentry_private_data(struct dentry *dentry)
 {
-	if (!dentry || !dentry->d_fsdata)
-		return;
 	kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata);
 	dentry->d_fsdata = NULL;
 }

diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index e4fd3fb..d890c57 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c

@@ -34,6 +34,8 @@ enum {
 	Opt_reserved_mb,
 	Opt_gid_derivation,
 	Opt_default_normal,
+	Opt_nocache,
+	Opt_unshared_obb,
 	Opt_err,
 };
 
@@ -47,7 +49,9 @@ static const match_table_t sdcardfs_tokens = {
 	{Opt_multiuser, "multiuser"},
 	{Opt_gid_derivation, "derive_gid"},
 	{Opt_default_normal, "default_normal"},
+	{Opt_unshared_obb, "unshared_obb"},
 	{Opt_reserved_mb, "reserved_mb=%u"},
+	{Opt_nocache, "nocache"},
 	{Opt_err, NULL}
 };
 
@@ -71,6 +75,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 	/* by default, gid derivation is off */
 	opts->gid_derivation = false;
 	opts->default_normal = false;
+	opts->nocache = false;
 
 	*debug = 0;
 
@@ -128,6 +133,12 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 		case Opt_default_normal:
 			opts->default_normal = true;
 			break;
+		case Opt_nocache:
+			opts->nocache = true;
+			break;
+		case Opt_unshared_obb:
+			opts->unshared_obb = true;
+			break;
 		/* unknown option */
 		default:
 			if (!silent)
@@ -181,13 +192,16 @@ int parse_options_remount(struct super_block *sb, char *options, int silent,
 				return 0;
 			vfsopts->mask = option;
 			break;
+		case Opt_unshared_obb:
 		case Opt_default_normal:
 		case Opt_multiuser:
 		case Opt_userid:
 		case Opt_fsuid:
 		case Opt_fsgid:
 		case Opt_reserved_mb:
-			pr_warn("Option \"%s\" can't be changed during remount\n", p);
+		case Opt_gid_derivation:
+			if (!silent)
+				pr_warn("Option \"%s\" can't be changed during remount\n", p);
 			break;
 		/* unknown option */
 		default:
@@ -295,6 +309,13 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
 	atomic_inc(&lower_sb->s_active);
 	sdcardfs_set_lower_super(sb, lower_sb);
 
+	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("sdcardfs: maximum fs stacking depth exceeded\n");
+		err = -EINVAL;
+		goto out_sput;
+	}
+
 	/* inherit maxbytes from lower file system */
 	sb->s_maxbytes = lower_sb->s_maxbytes;
 
@@ -316,7 +337,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
 	sb->s_root = d_make_root(inode);
 	if (!sb->s_root) {
 		err = -ENOMEM;
-		goto out_iput;
+		goto out_sput;
 	}
 	d_set_d_op(sb->s_root, &sdcardfs_ci_dops);
 
@@ -361,8 +382,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
 	/* no longer needed: free_dentry_private_data(sb->s_root); */
 out_freeroot:
 	dput(sb->s_root);
-out_iput:
-	iput(inode);
+	sb->s_root = NULL;
 out_sput:
 	/* drop refs we took earlier */
 	atomic_dec(&lower_sb->s_active);
@@ -422,7 +442,7 @@ void sdcardfs_kill_sb(struct super_block *sb)
 {
 	struct sdcardfs_sb_info *sbi;
 
-	if (sb->s_magic == SDCARDFS_SUPER_MAGIC) {
+	if (sb->s_magic == SDCARDFS_SUPER_MAGIC && sb->s_fs_info) {
 		sbi = SDCARDFS_SB(sb);
 		mutex_lock(&sdcardfs_super_list_lock);
 		list_del(&sbi->list);

diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index ec2290a..6219771 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h

@@ -197,7 +197,9 @@ struct sdcardfs_mount_options {
 	bool multiuser;
 	bool gid_derivation;
 	bool default_normal;
+	bool unshared_obb;
 	unsigned int reserved_mb;
+	bool nocache;
 };
 
 struct sdcardfs_vfsmount_options {

diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index cffcdb1..140696e 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c

@@ -311,6 +311,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
 		seq_puts(m, ",default_normal");
 	if (opts->reserved_mb != 0)
 		seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
+	if (opts->nocache)
+		seq_printf(m, ",nocache");
 
 	return 0;
 };

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 6dd158a..ffb093e 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig

@@ -26,6 +26,34 @@
 	  If unsure, say N.
 
 choice
+	prompt "File decompression options"
+	depends on SQUASHFS
+	help
+	  Squashfs now supports two options for decompressing file
+	  data.  Traditionally Squashfs has decompressed into an
+	  intermediate buffer and then memcopied it into the page cache.
+	  Squashfs now supports the ability to decompress directly into
+	  the page cache.
+
+	  If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+	bool "Decompress file data into an intermediate buffer"
+	help
+	  Decompress file data into an intermediate buffer and then
+	  memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+	bool "Decompress files directly into the page cache"
+	help
+	  Directly decompress file data into the page cache.
+	  Doing so can significantly improve performance because
+	  it eliminates a memcpy and it also removes the lock contention
+	  on the single buffer.
+
+endchoice
+
+choice
 	prompt "Decompressor parallelisation options"
 	depends on SQUASHFS
 	help

diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index fe51f15..246a6f3 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile

@@ -5,7 +5,8 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-y += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 7077476..cec0fa2 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c

@@ -28,12 +28,9 @@
 
 #include <linux/fs.h>
 #include <linux/vfs.h>
-#include <linux/bio.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/workqueue.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -41,382 +38,45 @@
 #include "decompressor.h"
 #include "page_actor.h"
 
-static struct workqueue_struct *squashfs_read_wq;
-
-struct squashfs_read_request {
-	struct super_block *sb;
-	u64 index;
-	int length;
-	int compressed;
-	int offset;
-	u64 read_end;
-	struct squashfs_page_actor *output;
-	enum {
-		SQUASHFS_COPY,
-		SQUASHFS_DECOMPRESS,
-		SQUASHFS_METADATA,
-	} data_processing;
-	bool synchronous;
-
-	/*
-	 * If the read is synchronous, it is possible to retrieve information
-	 * about the request by setting these pointers.
-	 */
-	int *res;
-	int *bytes_read;
-	int *bytes_uncompressed;
-
-	int nr_buffers;
-	struct buffer_head **bh;
-	struct work_struct offload;
-};
-
-struct squashfs_bio_request {
-	struct buffer_head **bh;
-	int nr_buffers;
-};
-
-static int squashfs_bio_submit(struct squashfs_read_request *req);
-
-int squashfs_init_read_wq(void)
+/*
+ * Read the metadata block length, this is stored in the first two
+ * bytes of the metadata block.
+ */
+static struct buffer_head *get_block_length(struct super_block *sb,
+			u64 *cur_index, int *offset, int *length)
 {
-	squashfs_read_wq = create_workqueue("SquashFS read wq");
-	return !!squashfs_read_wq;
-}
-
-void squashfs_destroy_read_wq(void)
-{
-	flush_workqueue(squashfs_read_wq);
-	destroy_workqueue(squashfs_read_wq);
-}
-
-static void free_read_request(struct squashfs_read_request *req, int error)
-{
-	if (!req->synchronous)
-		squashfs_page_actor_free(req->output, error);
-	if (req->res)
-		*(req->res) = error;
-	kfree(req->bh);
-	kfree(req);
-}
-
-static void squashfs_process_blocks(struct squashfs_read_request *req)
-{
-	int error = 0;
-	int bytes, i, length;
-	struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-	struct squashfs_page_actor *actor = req->output;
-	struct buffer_head **bh = req->bh;
-	int nr_buffers = req->nr_buffers;
-
-	for (i = 0; i < nr_buffers; ++i) {
-		if (!bh[i])
-			continue;
-		wait_on_buffer(bh[i]);
-		if (!buffer_uptodate(bh[i]))
-			error = -EIO;
-	}
-	if (error)
-		goto cleanup;
-
-	if (req->data_processing == SQUASHFS_METADATA) {
-		/* Extract the length of the metadata block */
-		if (req->offset != msblk->devblksize - 1) {
-			length = le16_to_cpup((__le16 *)
-					(bh[0]->b_data + req->offset));
-		} else {
-			length = (unsigned char)bh[0]->b_data[req->offset];
-			length |= (unsigned char)bh[1]->b_data[0] << 8;
-		}
-		req->compressed = SQUASHFS_COMPRESSED(length);
-		req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
-						       : SQUASHFS_COPY;
-		length = SQUASHFS_COMPRESSED_SIZE(length);
-		if (req->index + length + 2 > req->read_end) {
-			for (i = 0; i < nr_buffers; ++i)
-				put_bh(bh[i]);
-			kfree(bh);
-			req->length = length;
-			req->index += 2;
-			squashfs_bio_submit(req);
-			return;
-		}
-		req->length = length;
-		req->offset = (req->offset + 2) % PAGE_SIZE;
-		if (req->offset < 2) {
-			put_bh(bh[0]);
-			++bh;
-			--nr_buffers;
-		}
-	}
-	if (req->bytes_read)
-		*(req->bytes_read) = req->length;
-
-	if (req->data_processing == SQUASHFS_COPY) {
-		squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset,
-			req->length, msblk->devblksize);
-	} else if (req->data_processing == SQUASHFS_DECOMPRESS) {
-		req->length = squashfs_decompress(msblk, bh, nr_buffers,
-			req->offset, req->length, actor);
-		if (req->length < 0) {
-			error = -EIO;
-			goto cleanup;
-		}
-	}
-
-	/* Last page may have trailing bytes not filled */
-	bytes = req->length % PAGE_SIZE;
-	if (bytes && actor->page[actor->pages - 1])
-		zero_user_segment(actor->page[actor->pages - 1], bytes,
-				  PAGE_SIZE);
-
-cleanup:
-	if (req->bytes_uncompressed)
-		*(req->bytes_uncompressed) = req->length;
-	if (error) {
-		for (i = 0; i < nr_buffers; ++i)
-			if (bh[i])
-				put_bh(bh[i]);
-	}
-	free_read_request(req, error);
-}
-
-static void read_wq_handler(struct work_struct *work)
-{
-	squashfs_process_blocks(container_of(work,
-		    struct squashfs_read_request, offload));
-}
-
-static void squashfs_bio_end_io(struct bio *bio)
-{
-	int i;
-	int error = bio->bi_error;
-	struct squashfs_bio_request *bio_req = bio->bi_private;
-
-	bio_put(bio);
-
-	for (i = 0; i < bio_req->nr_buffers; ++i) {
-		if (!bio_req->bh[i])
-			continue;
-		if (!error)
-			set_buffer_uptodate(bio_req->bh[i]);
-		else
-			clear_buffer_uptodate(bio_req->bh[i]);
-		unlock_buffer(bio_req->bh[i]);
-	}
-	kfree(bio_req);
-}
-
-static int bh_is_optional(struct squashfs_read_request *req, int idx)
-{
-	int start_idx, end_idx;
-	struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
-	start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT;
-	end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> PAGE_SHIFT;
-	if (start_idx >= req->output->pages)
-		return 1;
-	if (start_idx < 0)
-		start_idx = end_idx;
-	if (end_idx >= req->output->pages)
-		end_idx = start_idx;
-	return !req->output->page[start_idx] && !req->output->page[end_idx];
-}
-
-static int actor_getblks(struct squashfs_read_request *req, u64 block)
-{
-	int i;
-
-	req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO);
-	if (!req->bh)
-		return -ENOMEM;
-
-	for (i = 0; i < req->nr_buffers; ++i) {
-		/*
-		 * When dealing with an uncompressed block, the actor may
-		 * contains NULL pages. There's no need to read the buffers
-		 * associated with these pages.
-		 */
-		if (!req->compressed && bh_is_optional(req, i)) {
-			req->bh[i] = NULL;
-			continue;
-		}
-		req->bh[i] = sb_getblk(req->sb, block + i);
-		if (!req->bh[i]) {
-			while (--i) {
-				if (req->bh[i])
-					put_bh(req->bh[i]);
-			}
-			return -1;
-		}
-	}
-	return 0;
-}
-
-static int squashfs_bio_submit(struct squashfs_read_request *req)
-{
-	struct bio *bio = NULL;
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
 	struct buffer_head *bh;
-	struct squashfs_bio_request *bio_req = NULL;
-	int b = 0, prev_block = 0;
-	struct squashfs_sb_info *msblk = req->sb->s_fs_info;
 
-	u64 read_start = round_down(req->index, msblk->devblksize);
-	u64 read_end = round_up(req->index + req->length, msblk->devblksize);
-	sector_t block = read_start >> msblk->devblksize_log2;
-	sector_t block_end = read_end >> msblk->devblksize_log2;
-	int offset = read_start - round_down(req->index, PAGE_SIZE);
-	int nr_buffers = block_end - block;
-	int blksz = msblk->devblksize;
-	int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES
-						       : nr_buffers;
+	bh = sb_bread(sb, *cur_index);
+	if (bh == NULL)
+		return NULL;
 
-	/* Setup the request */
-	req->read_end = read_end;
-	req->offset = req->index - read_start;
-	req->nr_buffers = nr_buffers;
-	if (actor_getblks(req, block) < 0)
-		goto getblk_failed;
+	if (msblk->devblksize - *offset == 1) {
+		*length = (unsigned char) bh->b_data[*offset];
+		put_bh(bh);
+		bh = sb_bread(sb, ++(*cur_index));
+		if (bh == NULL)
+			return NULL;
+		*length |= (unsigned char) bh->b_data[0] << 8;
+		*offset = 1;
+	} else {
+		*length = (unsigned char) bh->b_data[*offset] |
+			(unsigned char) bh->b_data[*offset + 1] << 8;
+		*offset += 2;
 
-	/* Create and submit the BIOs */
-	for (b = 0; b < nr_buffers; ++b, offset += blksz) {
-		bh = req->bh[b];
-		if (!bh || !trylock_buffer(bh))
-			continue;
-		if (buffer_uptodate(bh)) {
-			unlock_buffer(bh);
-			continue;
+		if (*offset == msblk->devblksize) {
+			put_bh(bh);
+			bh = sb_bread(sb, ++(*cur_index));
+			if (bh == NULL)
+				return NULL;
+			*offset = 0;
 		}
-		offset %= PAGE_SIZE;
-
-		/* Append the buffer to the current BIO if it is contiguous */
-		if (bio && bio_req && prev_block + 1 == b) {
-			if (bio_add_page(bio, bh->b_page, blksz, offset)) {
-				bio_req->nr_buffers += 1;
-				prev_block = b;
-				continue;
-			}
-		}
-
-		/* Otherwise, submit the current BIO and create a new one */
-		if (bio)
-			submit_bio(bio);
-		bio_req = kcalloc(1, sizeof(struct squashfs_bio_request),
-				  GFP_NOIO);
-		if (!bio_req)
-			goto req_alloc_failed;
-		bio_req->bh = &req->bh[b];
-		bio = bio_alloc(GFP_NOIO, bio_max_pages);
-		if (!bio)
-			goto bio_alloc_failed;
-		bio->bi_bdev = req->sb->s_bdev;
-		bio->bi_iter.bi_sector = (block + b)
-				       << (msblk->devblksize_log2 - 9);
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
-		bio->bi_private = bio_req;
-		bio->bi_end_io = squashfs_bio_end_io;
-
-		bio_add_page(bio, bh->b_page, blksz, offset);
-		bio_req->nr_buffers += 1;
-		prev_block = b;
 	}
-	if (bio)
-		submit_bio(bio);
 
-	if (req->synchronous)
-		squashfs_process_blocks(req);
-	else {
-		INIT_WORK(&req->offload, read_wq_handler);
-		schedule_work(&req->offload);
-	}
-	return 0;
-
-bio_alloc_failed:
-	kfree(bio_req);
-req_alloc_failed:
-	unlock_buffer(bh);
-	while (--nr_buffers >= b)
-		if (req->bh[nr_buffers])
-			put_bh(req->bh[nr_buffers]);
-	while (--b >= 0)
-		if (req->bh[b])
-			wait_on_buffer(req->bh[b]);
-getblk_failed:
-	free_read_request(req, -ENOMEM);
-	return -ENOMEM;
+	return bh;
 }
 
-static int read_metadata_block(struct squashfs_read_request *req,
-			       u64 *next_index)
-{
-	int ret, error, bytes_read = 0, bytes_uncompressed = 0;
-	struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
-	if (req->index + 2 > msblk->bytes_used) {
-		free_read_request(req, -EINVAL);
-		return -EINVAL;
-	}
-	req->length = 2;
-
-	/* Do not read beyond the end of the device */
-	if (req->index + req->length > msblk->bytes_used)
-		req->length = msblk->bytes_used - req->index;
-	req->data_processing = SQUASHFS_METADATA;
-
-	/*
-	 * Reading metadata is always synchronous because we don't know the
-	 * length in advance and the function is expected to update
-	 * 'next_index' and return the length.
-	 */
-	req->synchronous = true;
-	req->res = &error;
-	req->bytes_read = &bytes_read;
-	req->bytes_uncompressed = &bytes_uncompressed;
-
-	TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n",
-	      req->index, req->compressed ? "" : "un", bytes_read,
-	      req->output->length);
-
-	ret = squashfs_bio_submit(req);
-	if (ret)
-		return ret;
-	if (error)
-		return error;
-	if (next_index)
-		*next_index += 2 + bytes_read;
-	return bytes_uncompressed;
-}
-
-static int read_data_block(struct squashfs_read_request *req, int length,
-			   u64 *next_index, bool synchronous)
-{
-	int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0;
-
-	req->compressed = SQUASHFS_COMPRESSED_BLOCK(length);
-	req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
-	req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
-					       : SQUASHFS_COPY;
-
-	req->synchronous = synchronous;
-	if (synchronous) {
-		req->res = &error;
-		req->bytes_read = &bytes_read;
-		req->bytes_uncompressed = &bytes_uncompressed;
-	}
-
-	TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
-	      req->index, req->compressed ? "" : "un", req->length,
-	      req->output->length);
-
-	ret = squashfs_bio_submit(req);
-	if (ret)
-		return ret;
-	if (synchronous)
-		ret = error ? error : bytes_uncompressed;
-	if (next_index)
-		*next_index += length;
-	return ret;
-}
 
 /*
  * Read and decompress a metadata block or datablock.  Length is non-zero
@@ -427,50 +87,130 @@ static int read_data_block(struct squashfs_read_request *req, int length,
  * generated a larger block - this does occasionally happen with compression
  * algorithms).
  */
-static int __squashfs_read_data(struct super_block *sb, u64 index, int length,
-	u64 *next_index, struct squashfs_page_actor *output, bool sync)
-{
-	struct squashfs_read_request *req;
-
-	req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL);
-	if (!req) {
-		if (!sync)
-			squashfs_page_actor_free(output, -ENOMEM);
-		return -ENOMEM;
-	}
-
-	req->sb = sb;
-	req->index = index;
-	req->output = output;
-
-	if (next_index)
-		*next_index = index;
-
-	if (length)
-		length = read_data_block(req, length, next_index, sync);
-	else
-		length = read_metadata_block(req, next_index);
-
-	if (length < 0) {
-		ERROR("squashfs_read_data failed to read block 0x%llx\n",
-		      (unsigned long long)index);
-		return -EIO;
-	}
-
-	return length;
-}
-
 int squashfs_read_data(struct super_block *sb, u64 index, int length,
-	u64 *next_index, struct squashfs_page_actor *output)
+		u64 *next_index, struct squashfs_page_actor *output)
 {
-	return __squashfs_read_data(sb, index, length, next_index, output,
-				    true);
-}
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	struct buffer_head **bh;
+	int offset = index & ((1 << msblk->devblksize_log2) - 1);
+	u64 cur_index = index >> msblk->devblksize_log2;
+	int bytes, compressed, b = 0, k = 0, avail, i;
 
-int squashfs_read_data_async(struct super_block *sb, u64 index, int length,
-	u64 *next_index, struct squashfs_page_actor *output)
-{
+	bh = kcalloc(((output->length + msblk->devblksize - 1)
+		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
+	if (bh == NULL)
+		return -ENOMEM;
 
-	return __squashfs_read_data(sb, index, length, next_index, output,
-				    false);
+	if (length) {
+		/*
+		 * Datablock.
+		 */
+		bytes = -offset;
+		compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+		length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+		if (next_index)
+			*next_index = index + length;
+
+		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
+			index, compressed ? "" : "un", length, output->length);
+
+		if (length < 0 || length > output->length ||
+				(index + length) > msblk->bytes_used)
+			goto read_failure;
+
+		for (b = 0; bytes < length; b++, cur_index++) {
+			bh[b] = sb_getblk(sb, cur_index);
+			if (bh[b] == NULL)
+				goto block_release;
+			bytes += msblk->devblksize;
+		}
+		ll_rw_block(REQ_OP_READ, 0, b, bh);
+	} else {
+		/*
+		 * Metadata block.
+		 */
+		if ((index + 2) > msblk->bytes_used)
+			goto read_failure;
+
+		bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+		if (bh[0] == NULL)
+			goto read_failure;
+		b = 1;
+
+		bytes = msblk->devblksize - offset;
+		compressed = SQUASHFS_COMPRESSED(length);
+		length = SQUASHFS_COMPRESSED_SIZE(length);
+		if (next_index)
+			*next_index = index + length + 2;
+
+		TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
+				compressed ? "" : "un", length);
+
+		if (length < 0 || length > output->length ||
+					(index + length) > msblk->bytes_used)
+			goto block_release;
+
+		for (; bytes < length; b++) {
+			bh[b] = sb_getblk(sb, ++cur_index);
+			if (bh[b] == NULL)
+				goto block_release;
+			bytes += msblk->devblksize;
+		}
+		ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
+	}
+
+	for (i = 0; i < b; i++) {
+		wait_on_buffer(bh[i]);
+		if (!buffer_uptodate(bh[i]))
+			goto block_release;
+	}
+
+	if (compressed) {
+		if (!msblk->stream)
+			goto read_failure;
+		length = squashfs_decompress(msblk, bh, b, offset, length,
+			output);
+		if (length < 0)
+			goto read_failure;
+	} else {
+		/*
+		 * Block is uncompressed.
+		 */
+		int in, pg_offset = 0;
+		void *data = squashfs_first_page(output);
+
+		for (bytes = length; k < b; k++) {
+			in = min(bytes, msblk->devblksize - offset);
+			bytes -= in;
+			while (in) {
+				if (pg_offset == PAGE_SIZE) {
+					data = squashfs_next_page(output);
+					pg_offset = 0;
+				}
+				avail = min_t(int, in, PAGE_SIZE -
+						pg_offset);
+				memcpy(data + pg_offset, bh[k]->b_data + offset,
+						avail);
+				in -= avail;
+				pg_offset += avail;
+				offset += avail;
+			}
+			offset = 0;
+			put_bh(bh[k]);
+		}
+		squashfs_finish_page(output);
+	}
+
+	kfree(bh);
+	return length;
+
+block_release:
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+read_failure:
+	ERROR("squashfs_read_data failed to read block 0x%llx\n",
+					(unsigned long long) index);
+	kfree(bh);
+	return -EIO;
 }

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 9d9d4aa..0839efa 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c

@@ -209,14 +209,17 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry)
  */
 void squashfs_cache_delete(struct squashfs_cache *cache)
 {
-	int i;
+	int i, j;
 
 	if (cache == NULL)
 		return;
 
 	for (i = 0; i < cache->entries; i++) {
-		if (cache->entry[i].page)
-			free_page_array(cache->entry[i].page, cache->pages);
+		if (cache->entry[i].data) {
+			for (j = 0; j < cache->pages; j++)
+				kfree(cache->entry[i].data[j]);
+			kfree(cache->entry[i].data);
+		}
 		kfree(cache->entry[i].actor);
 	}
 
@@ -233,7 +236,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
 struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 	int block_size)
 {
-	int i;
+	int i, j;
 	struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 
 	if (cache == NULL) {
@@ -265,13 +268,22 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 		init_waitqueue_head(&cache->entry[i].wait_queue);
 		entry->cache = cache;
 		entry->block = SQUASHFS_INVALID_BLK;
-		entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
-		if (!entry->page) {
+		entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
+		if (entry->data == NULL) {
 			ERROR("Failed to allocate %s cache entry\n", name);
 			goto cleanup;
 		}
-		entry->actor = squashfs_page_actor_init(entry->page,
-			cache->pages, 0, NULL);
+
+		for (j = 0; j < cache->pages; j++) {
+			entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+			if (entry->data[j] == NULL) {
+				ERROR("Failed to allocate %s buffer\n", name);
+				goto cleanup;
+			}
+		}
+
+		entry->actor = squashfs_page_actor_init(entry->data,
+						cache->pages, 0);
 		if (entry->actor == NULL) {
 			ERROR("Failed to allocate %s cache entry\n", name);
 			goto cleanup;
@@ -302,20 +314,18 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
 		return min(length, entry->length - offset);
 
 	while (offset < entry->length) {
-		void *buff = kmap_atomic(entry->page[offset / PAGE_SIZE])
-			     + (offset % PAGE_SIZE);
+		void *buff = entry->data[offset / PAGE_SIZE]
+				+ (offset % PAGE_SIZE);
 		int bytes = min_t(int, entry->length - offset,
 				PAGE_SIZE - (offset % PAGE_SIZE));
 
 		if (bytes >= remaining) {
 			memcpy(buffer, buff, remaining);
-			kunmap_atomic(buff);
 			remaining = 0;
 			break;
 		}
 
 		memcpy(buffer, buff, bytes);
-		kunmap_atomic(buff);
 		buffer += bytes;
 		remaining -= bytes;
 		offset += bytes;
@@ -409,38 +419,43 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
 void *squashfs_read_table(struct super_block *sb, u64 block, int length)
 {
 	int pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	struct page **page;
-	void *buff;
-	int res;
+	int i, res;
+	void *table, *buffer, **data;
 	struct squashfs_page_actor *actor;
 
-	page = alloc_page_array(pages, GFP_KERNEL);
-	if (!page)
+	table = buffer = kmalloc(length, GFP_KERNEL);
+	if (table == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	actor = squashfs_page_actor_init(page, pages, length, NULL);
-	if (actor == NULL) {
+	data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+	if (data == NULL) {
 		res = -ENOMEM;
 		goto failed;
 	}
 
+	actor = squashfs_page_actor_init(data, pages, length);
+	if (actor == NULL) {
+		res = -ENOMEM;
+		goto failed2;
+	}
+
+	for (i = 0; i < pages; i++, buffer += PAGE_SIZE)
+		data[i] = buffer;
+
 	res = squashfs_read_data(sb, block, length |
 		SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
 
-	if (res < 0)
-		goto failed2;
+	kfree(data);
+	kfree(actor);
 
-	buff = kmalloc(length, GFP_KERNEL);
-	if (!buff)
-		goto failed2;
-	squashfs_actor_to_buf(actor, buff, length);
-	squashfs_page_actor_free(actor, 0);
-	free_page_array(page, pages);
-	return buff;
+	if (res < 0)
+		goto failed;
+
+	return table;
 
 failed2:
-	squashfs_page_actor_free(actor, 0);
+	kfree(data);
 failed:
-	free_page_array(page, pages);
+	kfree(table);
 	return ERR_PTR(res);
 }

diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 7de35bf..d2bc136 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c

@@ -24,8 +24,7 @@
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/fs.h>
+#include <linux/buffer_head.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -95,44 +94,40 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
 static void *get_comp_opts(struct super_block *sb, unsigned short flags)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	void *comp_opts, *buffer = NULL;
-	struct page *page;
+	void *buffer = NULL, *comp_opts;
 	struct squashfs_page_actor *actor = NULL;
 	int length = 0;
 
-	if (!SQUASHFS_COMP_OPTS(flags))
-		return squashfs_comp_opts(msblk, buffer, length);
-
 	/*
 	 * Read decompressor specific options from file system if present
 	 */
+	if (SQUASHFS_COMP_OPTS(flags)) {
+		buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (buffer == NULL) {
+			comp_opts = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 
-	page = alloc_page(GFP_KERNEL);
-	if (!page)
-		return ERR_PTR(-ENOMEM);
+		actor = squashfs_page_actor_init(&buffer, 1, 0);
+		if (actor == NULL) {
+			comp_opts = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 
-	actor = squashfs_page_actor_init(&page, 1, 0, NULL);
-	if (actor == NULL) {
-		comp_opts = ERR_PTR(-ENOMEM);
-		goto actor_error;
+		length = squashfs_read_data(sb,
+			sizeof(struct squashfs_super_block), 0, NULL, actor);
+
+		if (length < 0) {
+			comp_opts = ERR_PTR(length);
+			goto out;
+		}
 	}
 
-	length = squashfs_read_data(sb,
-		sizeof(struct squashfs_super_block), 0, NULL, actor);
-
-	if (length < 0) {
-		comp_opts = ERR_PTR(length);
-		goto read_error;
-	}
-
-	buffer = kmap_atomic(page);
 	comp_opts = squashfs_comp_opts(msblk, buffer, length);
-	kunmap_atomic(buffer);
 
-read_error:
-	squashfs_page_actor_free(actor, 0);
-actor_error:
-	__free_page(page);
+out:
+	kfree(actor);
+	kfree(buffer);
 	return comp_opts;
 }
 

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index cd3c5c8..f1c1430 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c

@@ -47,7 +47,6 @@
 #include <linux/string.h>
 #include <linux/pagemap.h>
 #include <linux/mutex.h>
-#include <linux/mm_inline.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -375,13 +374,29 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
 	return squashfs_block_size(size);
 }
 
+void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail)
+{
+	int copied;
+	void *pageaddr;
+
+	pageaddr = kmap_atomic(page);
+	copied = squashfs_copy_data(pageaddr, buffer, offset, avail);
+	memset(pageaddr + copied, 0, PAGE_SIZE - copied);
+	kunmap_atomic(pageaddr);
+
+	flush_dcache_page(page);
+	if (copied == avail)
+		SetPageUptodate(page);
+	else
+		SetPageError(page);
+}
+
 /* Copy data into page cache  */
 void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 	int bytes, int offset)
 {
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	void *pageaddr;
 	int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
 	int start_index = page->index & ~mask, end_index = start_index | mask;
 
@@ -407,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 		if (PageUptodate(push_page))
 			goto skip_page;
 
-		pageaddr = kmap_atomic(push_page);
-		squashfs_copy_data(pageaddr, buffer, offset, avail);
-		memset(pageaddr + avail, 0, PAGE_SIZE - avail);
-		kunmap_atomic(pageaddr);
-		flush_dcache_page(push_page);
-		SetPageUptodate(push_page);
+		squashfs_fill_page(push_page, buffer, offset, avail);
 skip_page:
 		unlock_page(push_page);
 		if (i != page->index)
@@ -421,10 +431,9 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 }
 
 /* Read datablock stored packed inside a fragment (tail-end packed block) */
-static int squashfs_readpage_fragment(struct page *page)
+static int squashfs_readpage_fragment(struct page *page, int expected)
 {
 	struct inode *inode = page->mapping->host;
-	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 	struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
 		squashfs_i(inode)->fragment_block,
 		squashfs_i(inode)->fragment_size);
@@ -435,140 +444,70 @@ static int squashfs_readpage_fragment(struct page *page)
 			squashfs_i(inode)->fragment_block,
 			squashfs_i(inode)->fragment_size);
 	else
-		squashfs_copy_cache(page, buffer, i_size_read(inode) &
-			(msblk->block_size - 1),
+		squashfs_copy_cache(page, buffer, expected,
 			squashfs_i(inode)->fragment_offset);
 
 	squashfs_cache_put(buffer);
 	return res;
 }
 
-static int squashfs_readpages_fragment(struct page *page,
-	struct list_head *readahead_pages, struct address_space *mapping)
+static int squashfs_readpage_sparse(struct page *page, int expected)
 {
-	if (!page) {
-		page = lru_to_page(readahead_pages);
-		list_del(&page->lru);
-		if (add_to_page_cache_lru(page, mapping, page->index,
-			mapping_gfp_constraint(mapping, GFP_KERNEL))) {
-			put_page(page);
-			return 0;
-		}
-	}
-	return squashfs_readpage_fragment(page);
-}
-
-static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
-{
-	struct inode *inode = page->mapping->host;
-	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int bytes = index == file_end ?
-			(i_size_read(inode) & (msblk->block_size - 1)) :
-			 msblk->block_size;
-
-	squashfs_copy_cache(page, NULL, bytes, 0);
-	return 0;
-}
-
-static int squashfs_readpages_sparse(struct page *page,
-	struct list_head *readahead_pages, int index, int file_end,
-	struct address_space *mapping)
-{
-	if (!page) {
-		page = lru_to_page(readahead_pages);
-		list_del(&page->lru);
-		if (add_to_page_cache_lru(page, mapping, page->index,
-			mapping_gfp_constraint(mapping, GFP_KERNEL))) {
-			put_page(page);
-			return 0;
-		}
-	}
-	return squashfs_readpage_sparse(page, index, file_end);
-}
-
-static int __squashfs_readpages(struct file *file, struct page *page,
-	struct list_head *readahead_pages, unsigned int nr_pages,
-	struct address_space *mapping)
-{
-	struct inode *inode = mapping->host;
-	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int file_end = i_size_read(inode) >> msblk->block_log;
-	int res;
-
-	do {
-		struct page *cur_page = page ? page
-					     : lru_to_page(readahead_pages);
-		int page_index = cur_page->index;
-		int index = page_index >> (msblk->block_log - PAGE_SHIFT);
-
-		if (page_index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
-						PAGE_SHIFT))
-			return 1;
-
-		if (index < file_end || squashfs_i(inode)->fragment_block ==
-						SQUASHFS_INVALID_BLK) {
-			u64 block = 0;
-			int bsize = read_blocklist(inode, index, &block);
-
-			if (bsize < 0)
-				return -1;
-
-			if (bsize == 0) {
-				res = squashfs_readpages_sparse(page,
-					readahead_pages, index, file_end,
-					mapping);
-			} else {
-				res = squashfs_readpages_block(page,
-					readahead_pages, &nr_pages, mapping,
-					page_index, block, bsize);
-			}
-		} else {
-			res = squashfs_readpages_fragment(page,
-				readahead_pages, mapping);
-		}
-		if (res)
-			return 0;
-		page = NULL;
-	} while (readahead_pages && !list_empty(readahead_pages));
-
+	squashfs_copy_cache(page, NULL, expected, 0);
 	return 0;
 }
 
 static int squashfs_readpage(struct file *file, struct page *page)
 {
-	int ret;
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	int index = page->index >> (msblk->block_log - PAGE_SHIFT);
+	int file_end = i_size_read(inode) >> msblk->block_log;
+	int expected = index == file_end ?
+			(i_size_read(inode) & (msblk->block_size - 1)) :
+			 msblk->block_size;
+	int res;
+	void *pageaddr;
 
 	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-	      page->index, squashfs_i(page->mapping->host)->start);
+				page->index, squashfs_i(inode)->start);
 
-	get_page(page);
+	if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
+					PAGE_SHIFT))
+		goto out;
 
-	ret = __squashfs_readpages(file, page, NULL, 1, page->mapping);
-	if (ret) {
-		flush_dcache_page(page);
-		if (ret < 0)
-			SetPageError(page);
+	if (index < file_end || squashfs_i(inode)->fragment_block ==
+					SQUASHFS_INVALID_BLK) {
+		u64 block = 0;
+		int bsize = read_blocklist(inode, index, &block);
+		if (bsize < 0)
+			goto error_out;
+
+		if (bsize == 0)
+			res = squashfs_readpage_sparse(page, expected);
 		else
-			SetPageUptodate(page);
-		zero_user_segment(page, 0, PAGE_SIZE);
-		unlock_page(page);
-		put_page(page);
-	}
+			res = squashfs_readpage_block(page, block, bsize, expected);
+	} else
+		res = squashfs_readpage_fragment(page, expected);
 
-	return 0;
-}
+	if (!res)
+		return 0;
 
-static int squashfs_readpages(struct file *file, struct address_space *mapping,
-			      struct list_head *pages, unsigned int nr_pages)
-{
-	TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n",
-		nr_pages, lru_to_page(pages)->index);
-	__squashfs_readpages(file, NULL, pages, nr_pages, mapping);
+error_out:
+	SetPageError(page);
+out:
+	pageaddr = kmap_atomic(page);
+	memset(pageaddr, 0, PAGE_SIZE);
+	kunmap_atomic(pageaddr);
+	flush_dcache_page(page);
+	if (!PageError(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+
 	return 0;
 }
 
 
 const struct address_space_operations squashfs_aops = {
-	.readpage = squashfs_readpage,
-	.readpages = squashfs_readpages,
+	.readpage = squashfs_readpage
 };

diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 0000000..a9ba8d9
--- /dev/null
+++ b/fs/squashfs/file_cache.c

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected)
+{
+	struct inode *i = page->mapping->host;
+	struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+		block, bsize);
+	int res = buffer->error;
+
+	if (res)
+		ERROR("Unable to read page, block %llx, size %x\n", block,
+			bsize);
+	else
+		squashfs_copy_cache(page, buffer, expected, 0);
+
+	squashfs_cache_put(buffer);
+	return res;
+}

diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index dc87f77..80db1b8 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c

@@ -13,7 +13,6 @@
 #include <linux/string.h>
 #include <linux/pagemap.h>
 #include <linux/mutex.h>
-#include <linux/mm_inline.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -21,136 +20,157 @@
 #include "squashfs.h"
 #include "page_actor.h"
 
-static void release_actor_pages(struct page **page, int pages, int error)
-{
-	int i;
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+	int pages, struct page **page, int bytes);
 
-	for (i = 0; i < pages; i++) {
-		if (!page[i])
-			continue;
-		flush_dcache_page(page[i]);
-		if (!error)
-			SetPageUptodate(page[i]);
-		else {
-			SetPageError(page[i]);
-			zero_user_segment(page[i], 0, PAGE_SIZE);
-		}
-		unlock_page(page[i]);
-		put_page(page[i]);
-	}
-	kfree(page);
-}
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
+	int expected)
 
-/*
- * Create a "page actor" which will kmap and kunmap the
- * page cache pages appropriately within the decompressor
- */
-static struct squashfs_page_actor *actor_from_page_cache(
-	unsigned int actor_pages, struct page *target_page,
-	struct list_head *rpages, unsigned int *nr_pages, int start_index,
-	struct address_space *mapping)
 {
+	struct inode *inode = target_page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+	int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+	int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+	int start_index = target_page->index & ~mask;
+	int end_index = start_index | mask;
+	int i, n, pages, missing_pages, bytes, res = -ENOMEM;
 	struct page **page;
 	struct squashfs_page_actor *actor;
-	int i, n;
-	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+	void *pageaddr;
 
-	page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL);
-	if (!page)
-		return NULL;
+	if (end_index > file_end)
+		end_index = file_end;
 
-	for (i = 0, n = start_index; i < actor_pages; i++, n++) {
-		if (target_page == NULL && rpages && !list_empty(rpages)) {
-			struct page *cur_page = lru_to_page(rpages);
+	pages = end_index - start_index + 1;
 
-			if (cur_page->index < start_index + actor_pages) {
-				list_del(&cur_page->lru);
-				--(*nr_pages);
-				if (add_to_page_cache_lru(cur_page, mapping,
-							  cur_page->index, gfp))
-					put_page(cur_page);
-				else
-					target_page = cur_page;
-			} else
-				rpages = NULL;
-		}
+	page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
+	if (page == NULL)
+		return res;
 
-		if (target_page && target_page->index == n) {
-			page[i] = target_page;
-			target_page = NULL;
-		} else {
-			page[i] = grab_cache_page_nowait(mapping, n);
-			if (page[i] == NULL)
-				continue;
+	/*
+	 * Create a "page actor" which will kmap and kunmap the
+	 * page cache pages appropriately within the decompressor
+	 */
+	actor = squashfs_page_actor_init_special(page, pages, 0);
+	if (actor == NULL)
+		goto out;
+
+	/* Try to grab all the pages covered by the Squashfs block */
+	for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+		page[i] = (n == target_page->index) ? target_page :
+			grab_cache_page_nowait(target_page->mapping, n);
+
+		if (page[i] == NULL) {
+			missing_pages++;
+			continue;
 		}
 
 		if (PageUptodate(page[i])) {
 			unlock_page(page[i]);
 			put_page(page[i]);
 			page[i] = NULL;
+			missing_pages++;
 		}
 	}
 
-	actor = squashfs_page_actor_init(page, actor_pages, 0,
-			release_actor_pages);
-	if (!actor) {
-		release_actor_pages(page, actor_pages, -ENOMEM);
-		kfree(page);
-		return NULL;
+	if (missing_pages) {
+		/*
+		 * Couldn't get one or more pages, this page has either
+		 * been VM reclaimed, but others are still in the page cache
+		 * and uptodate, or we're racing with another thread in
+		 * squashfs_readpage also trying to grab them.  Fall back to
+		 * using an intermediate buffer.
+		 */
+		res = squashfs_read_cache(target_page, block, bsize, pages,
+							page, expected);
+		if (res < 0)
+			goto mark_errored;
+
+		goto out;
 	}
-	return actor;
+
+	/* Decompress directly into the page cache buffers */
+	res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+	if (res < 0)
+		goto mark_errored;
+
+	if (res != expected) {
+		res = -EIO;
+		goto mark_errored;
+	}
+
+	/* Last page may have trailing bytes not filled */
+	bytes = res % PAGE_SIZE;
+	if (bytes) {
+		pageaddr = kmap_atomic(page[pages - 1]);
+		memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
+		kunmap_atomic(pageaddr);
+	}
+
+	/* Mark pages as uptodate, unlock and release */
+	for (i = 0; i < pages; i++) {
+		flush_dcache_page(page[i]);
+		SetPageUptodate(page[i]);
+		unlock_page(page[i]);
+		if (page[i] != target_page)
+			put_page(page[i]);
+	}
+
+	kfree(actor);
+	kfree(page);
+
+	return 0;
+
+mark_errored:
+	/* Decompression failed, mark pages as errored.  Target_page is
+	 * dealt with by the caller
+	 */
+	for (i = 0; i < pages; i++) {
+		if (page[i] == NULL || page[i] == target_page)
+			continue;
+		flush_dcache_page(page[i]);
+		SetPageError(page[i]);
+		unlock_page(page[i]);
+		put_page(page[i]);
+	}
+
+out:
+	kfree(actor);
+	kfree(page);
+	return res;
 }
 
-int squashfs_readpages_block(struct page *target_page,
-			     struct list_head *readahead_pages,
-			     unsigned int *nr_pages,
-			     struct address_space *mapping,
-			     int page_index, u64 block, int bsize)
 
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+	int pages, struct page **page, int bytes)
 {
-	struct squashfs_page_actor *actor;
-	struct inode *inode = mapping->host;
-	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int start_index, end_index, file_end, actor_pages, res;
-	int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+	struct inode *i = target_page->mapping->host;
+	struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+						 block, bsize);
+	int res = buffer->error, n, offset = 0;
 
-	/*
-	 * If readpage() is called on an uncompressed datablock, we can just
-	 * read the pages instead of fetching the whole block.
-	 * This greatly improves the performance when a process keep doing
-	 * random reads because we only fetch the necessary data.
-	 * The readahead algorithm will take care of doing speculative reads
-	 * if necessary.
-	 * We can't read more than 1 block even if readahead provides use more
-	 * pages because we don't know yet if the next block is compressed or
-	 * not.
-	 */
-	if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
-		u64 block_end = block + msblk->block_size;
-
-		block += (page_index & mask) * PAGE_SIZE;
-		actor_pages = (block_end - block) / PAGE_SIZE;
-		if (*nr_pages < actor_pages)
-			actor_pages = *nr_pages;
-		start_index = page_index;
-		bsize = min_t(int, bsize, (PAGE_SIZE * actor_pages)
-					  | SQUASHFS_COMPRESSED_BIT_BLOCK);
-	} else {
-		file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
-		start_index = page_index & ~mask;
-		end_index = start_index | mask;
-		if (end_index > file_end)
-			end_index = file_end;
-		actor_pages = end_index - start_index + 1;
+	if (res) {
+		ERROR("Unable to read page, block %llx, size %x\n", block,
+			bsize);
+		goto out;
 	}
 
-	actor = actor_from_page_cache(actor_pages, target_page,
-				      readahead_pages, nr_pages, start_index,
-				      mapping);
-	if (!actor)
-		return -ENOMEM;
+	for (n = 0; n < pages && bytes > 0; n++,
+			bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
+		int avail = min_t(int, bytes, PAGE_SIZE);
 
-	res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL,
-				       actor);
-	return res < 0 ? res : 0;
+		if (page[n] == NULL)
+			continue;
+
+		squashfs_fill_page(page[n], buffer, offset, avail);
+		unlock_page(page[n]);
+		if (page[n] != target_page)
+			put_page(page[n]);
+	}
+
+out:
+	squashfs_cache_put(buffer);
+	return res;
 }

diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index df4fa3c..ff4468b 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c

@@ -94,17 +94,39 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	int res;
-	size_t dest_len = output->length;
 	struct squashfs_lz4 *stream = strm;
+	void *buff = stream->input, *data;
+	int avail, i, bytes = length, res;
+	size_t dest_len = output->length;
 
-	squashfs_bh_to_buf(bh, b, stream->input, offset, length,
-		msblk->devblksize);
+	for (i = 0; i < b; i++) {
+		avail = min(bytes, msblk->devblksize - offset);
+		memcpy(buff, bh[i]->b_data + offset, avail);
+		buff += avail;
+		bytes -= avail;
+		offset = 0;
+		put_bh(bh[i]);
+	}
+
 	res = lz4_decompress_unknownoutputsize(stream->input, length,
 					stream->output, &dest_len);
 	if (res)
 		return -EIO;
-	squashfs_buf_to_actor(stream->output, output, dest_len);
+
+	bytes = dest_len;
+	data = squashfs_first_page(output);
+	buff = stream->output;
+	while (data) {
+		if (bytes <= PAGE_SIZE) {
+			memcpy(data, buff, bytes);
+			break;
+		}
+		memcpy(data, buff, PAGE_SIZE);
+		buff += PAGE_SIZE;
+		bytes -= PAGE_SIZE;
+		data = squashfs_next_page(output);
+	}
+	squashfs_finish_page(output);
 
 	return dest_len;
 }

diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 2c844d5..934c17e 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c

@@ -79,19 +79,45 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	int res;
-	size_t out_len = output->length;
 	struct squashfs_lzo *stream = strm;
+	void *buff = stream->input, *data;
+	int avail, i, bytes = length, res;
+	size_t out_len = output->length;
 
-	squashfs_bh_to_buf(bh, b, stream->input, offset, length,
-		msblk->devblksize);
+	for (i = 0; i < b; i++) {
+		avail = min(bytes, msblk->devblksize - offset);
+		memcpy(buff, bh[i]->b_data + offset, avail);
+		buff += avail;
+		bytes -= avail;
+		offset = 0;
+		put_bh(bh[i]);
+	}
+
 	res = lzo1x_decompress_safe(stream->input, (size_t)length,
 					stream->output, &out_len);
 	if (res != LZO_E_OK)
-		return -EIO;
-	squashfs_buf_to_actor(stream->output, output, out_len);
+		goto failed;
 
-	return out_len;
+	res = bytes = (int)out_len;
+	data = squashfs_first_page(output);
+	buff = stream->output;
+	while (data) {
+		if (bytes <= PAGE_SIZE) {
+			memcpy(data, buff, bytes);
+			break;
+		} else {
+			memcpy(data, buff, PAGE_SIZE);
+			buff += PAGE_SIZE;
+			bytes -= PAGE_SIZE;
+			data = squashfs_next_page(output);
+		}
+	}
+	squashfs_finish_page(output);
+
+	return res;
+
+failed:
+	return -EIO;
 }
 
 const struct squashfs_decompressor squashfs_lzo_comp_ops = {

diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index e348f56..9b7b1b6 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c

@@ -9,11 +9,79 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include "page_actor.h"
 
-struct squashfs_page_actor *squashfs_page_actor_init(struct page **page,
-	int pages, int length, void (*release_pages)(struct page **, int, int))
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->buffer[0];
+}
+
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+	if (actor->next_page == actor->pages)
+		return NULL;
+
+	return actor->buffer[actor->next_page++];
+}
+
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+	/* empty */
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_SIZE;
+	actor->buffer = buffer;
+	actor->pages = pages;
+	actor->next_page = 0;
+	actor->squashfs_first_page = cache_first_page;
+	actor->squashfs_next_page = cache_next_page;
+	actor->squashfs_finish_page = cache_finish_page;
+	return actor;
+}
+
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+	if (actor->pageaddr)
+		kunmap_atomic(actor->pageaddr);
+
+	return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+		kmap_atomic(actor->page[actor->next_page++]);
+}
+
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+	if (actor->pageaddr)
+		kunmap_atomic(actor->pageaddr);
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+	int pages, int length)
 {
 	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
 
@@ -25,129 +93,8 @@ struct squashfs_page_actor *squashfs_page_actor_init(struct page **page,
 	actor->pages = pages;
 	actor->next_page = 0;
 	actor->pageaddr = NULL;
-	actor->release_pages = release_pages;
+	actor->squashfs_first_page = direct_first_page;
+	actor->squashfs_next_page = direct_next_page;
+	actor->squashfs_finish_page = direct_finish_page;
 	return actor;
 }
-
-void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error)
-{
-	if (!actor)
-		return;
-
-	if (actor->release_pages)
-		actor->release_pages(actor->page, actor->pages, error);
-	kfree(actor);
-}
-
-void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf,
-	int length)
-{
-	void *pageaddr;
-	int pos = 0, avail, i;
-
-	for (i = 0; i < actor->pages && pos < length; ++i) {
-		avail = min_t(int, length - pos, PAGE_SIZE);
-		if (actor->page[i]) {
-			pageaddr = kmap_atomic(actor->page[i]);
-			memcpy(buf + pos, pageaddr, avail);
-			kunmap_atomic(pageaddr);
-		}
-		pos += avail;
-	}
-}
-
-void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor,
-	int length)
-{
-	void *pageaddr;
-	int pos = 0, avail, i;
-
-	for (i = 0; i < actor->pages && pos < length; ++i) {
-		avail = min_t(int, length - pos, PAGE_SIZE);
-		if (actor->page[i]) {
-			pageaddr = kmap_atomic(actor->page[i]);
-			memcpy(pageaddr, buf + pos, avail);
-			kunmap_atomic(pageaddr);
-		}
-		pos += avail;
-	}
-}
-
-void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
-	struct squashfs_page_actor *actor, int offset, int length, int blksz)
-{
-	void *kaddr = NULL;
-	int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i;
-
-	while (bytes < length) {
-		if (actor->page[p]) {
-			kaddr = kmap_atomic(actor->page[p]);
-			while (pgoff < PAGE_SIZE && bytes < length) {
-				avail = min_t(int, blksz - offset,
-						PAGE_SIZE - pgoff);
-				memcpy(kaddr + pgoff, bh[b]->b_data + offset,
-				       avail);
-				pgoff += avail;
-				bytes += avail;
-				offset = (offset + avail) % blksz;
-				if (!offset) {
-					put_bh(bh[b]);
-					++b;
-				}
-			}
-			kunmap_atomic(kaddr);
-			pgoff = 0;
-		} else {
-			for (i = 0; i < PAGE_SIZE / blksz; ++i) {
-				if (bh[b])
-					put_bh(bh[b]);
-				++b;
-			}
-			bytes += PAGE_SIZE;
-		}
-		++p;
-	}
-}
-
-void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf,
-	int offset, int length, int blksz)
-{
-	int i, avail, bytes = 0;
-
-	for (i = 0; i < nr_buffers && bytes < length; ++i) {
-		avail = min_t(int, length - bytes, blksz - offset);
-		if (bh[i]) {
-			memcpy(buf + bytes, bh[i]->b_data + offset, avail);
-			put_bh(bh[i]);
-		}
-		bytes += avail;
-		offset = 0;
-	}
-}
-
-void free_page_array(struct page **page, int nr_pages)
-{
-	int i;
-
-	for (i = 0; i < nr_pages; ++i)
-		__free_page(page[i]);
-	kfree(page);
-}
-
-struct page **alloc_page_array(int nr_pages, int gfp_mask)
-{
-	int i;
-	struct page **page;
-
-	page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
-	if (!page)
-		return NULL;
-	for (i = 0; i < nr_pages; ++i) {
-		page[i] = alloc_page(gfp_mask);
-		if (!page[i]) {
-			free_page_array(page, i);
-			return NULL;
-		}
-	}
-	return page;
-}

diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index aa1ed79..98537ea 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h

@@ -5,61 +5,77 @@
  * Phillip Lougher <phillip@squashfs.org.uk>
  *
  * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level squashfsory.
+ * the COPYING file in the top-level directory.
  */
 
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
 struct squashfs_page_actor {
-	struct page	**page;
-	void	*pageaddr;
+	void	**page;
 	int	pages;
 	int	length;
 	int	next_page;
-	void	(*release_pages)(struct page **, int, int);
 };
 
-extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **,
-	int, int, void (*)(struct page **, int, int));
-extern void squashfs_page_actor_free(struct squashfs_page_actor *, int);
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
 
-extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int);
-extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int);
-extern void squashfs_bh_to_actor(struct buffer_head **, int,
-	struct squashfs_page_actor *, int, int, int);
-extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int,
-	int);
+	if (actor == NULL)
+		return NULL;
 
-/*
- * Calling code should avoid sleeping between calls to squashfs_first_page()
- * and squashfs_finish_page().
- */
+	actor->length = length ? : pages * PAGE_SIZE;
+	actor->page = page;
+	actor->pages = pages;
+	actor->next_page = 0;
+	return actor;
+}
+
 static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
 {
 	actor->next_page = 1;
-	return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0])
-						: NULL;
+	return actor->page[0];
 }
 
 static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
 {
-	if (!IS_ERR_OR_NULL(actor->pageaddr))
-		kunmap_atomic(actor->pageaddr);
-
-	if (actor->next_page == actor->pages)
-		return actor->pageaddr = ERR_PTR(-ENODATA);
-
-	actor->pageaddr = actor->page[actor->next_page] ?
-	    kmap_atomic(actor->page[actor->next_page]) : NULL;
-	++actor->next_page;
-	return actor->pageaddr;
+	return actor->next_page == actor->pages ? NULL :
+		actor->page[actor->next_page++];
 }
 
 static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
 {
-	if (!IS_ERR_OR_NULL(actor->pageaddr))
-		kunmap_atomic(actor->pageaddr);
+	/* empty */
 }
+#else
+struct squashfs_page_actor {
+	union {
+		void		**buffer;
+		struct page	**page;
+	};
+	void	*pageaddr;
+	void    *(*squashfs_first_page)(struct squashfs_page_actor *);
+	void    *(*squashfs_next_page)(struct squashfs_page_actor *);
+	void    (*squashfs_finish_page)(struct squashfs_page_actor *);
+	int	pages;
+	int	length;
+	int	next_page;
+};
 
-extern struct page **alloc_page_array(int, int);
-extern void free_page_array(struct page **, int);
-
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+							 **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+	return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+	return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+	actor->squashfs_finish_page(actor);
+}
+#endif
 #endif

diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index f4faab5..f89f8a7 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h

@@ -28,12 +28,8 @@
 #define WARNING(s, args...)	pr_warn("SQUASHFS: "s, ## args)
 
 /* block.c */
-extern int squashfs_init_read_wq(void);
-extern void squashfs_destroy_read_wq(void);
 extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
 				struct squashfs_page_actor *);
-extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *,
-				struct squashfs_page_actor *);
 
 /* cache.c */
 extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -71,12 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
 				u64, u64, unsigned int);
 
 /* file.c */
+void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int);
 void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
 				int);
 
-/* file_direct.c */
-extern int squashfs_readpages_block(struct page *, struct list_head *,
-	unsigned int *, struct address_space *, int, u64, int);
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int, int);
 
 /* id.c */
 extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);

diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 3b767ce..ef69c31 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h

@@ -49,7 +49,7 @@ struct squashfs_cache_entry {
 	int			num_waiters;
 	wait_queue_head_t	wait_queue;
 	struct squashfs_cache	*cache;
-	struct page		**page;
+	void			**data;
 	struct squashfs_page_actor	*actor;
 };
 

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 445ce58..1516bb7 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c

@@ -445,15 +445,9 @@ static int __init init_squashfs_fs(void)
 	if (err)
 		return err;
 
-	if (!squashfs_init_read_wq()) {
-		destroy_inodecache();
-		return -ENOMEM;
-	}
-
 	err = register_filesystem(&squashfs_fs_type);
 	if (err) {
 		destroy_inodecache();
-		squashfs_destroy_read_wq();
 		return err;
 	}
 
@@ -467,7 +461,6 @@ static void __exit exit_squashfs_fs(void)
 {
 	unregister_filesystem(&squashfs_fs_type);
 	destroy_inodecache();
-	squashfs_destroy_read_wq();
 }
 
 

diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 2f7be1f..6bfaef7 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c

@@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
 	struct comp_opts *opts;
 	int err = 0, n;
 
-	opts = kmalloc(sizeof(*opts), GFP_ATOMIC);
+	opts = kmalloc(sizeof(*opts), GFP_KERNEL);
 	if (opts == NULL) {
 		err = -ENOMEM;
 		goto out2;
@@ -136,7 +136,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	enum xz_ret xz_err;
 	int avail, total = 0, k = 0;
 	struct squashfs_xz *stream = strm;
-	void *buf = NULL;
 
 	xz_dec_reset(stream->state);
 	stream->buf.in_pos = 0;
@@ -157,20 +156,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 
 		if (stream->buf.out_pos == stream->buf.out_size) {
 			stream->buf.out = squashfs_next_page(output);
-			if (!IS_ERR(stream->buf.out)) {
+			if (stream->buf.out != NULL) {
 				stream->buf.out_pos = 0;
 				total += PAGE_SIZE;
 			}
 		}
 
-		if (!stream->buf.out) {
-			if (!buf) {
-				buf = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-				if (!buf)
-					goto out;
-			}
-			stream->buf.out = buf;
-		}
 		xz_err = xz_dec_run(stream->state, &stream->buf);
 
 		if (stream->buf.in_pos == stream->buf.in_size && k < b)
@@ -182,13 +173,11 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	if (xz_err != XZ_STREAM_END || k < b)
 		goto out;
 
-	kfree(buf);
 	return total + stream->buf.out_pos;
 
 out:
 	for (; k < b; k++)
 		put_bh(bh[k]);
-	kfree(buf);
 
 	return -EIO;
 }

diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index d917c72..2ec24d1 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c

@@ -66,7 +66,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	void *buf = NULL;
 	int zlib_err, zlib_init = 0, k = 0;
 	z_stream *stream = strm;
 
@@ -85,19 +84,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 
 		if (stream->avail_out == 0) {
 			stream->next_out = squashfs_next_page(output);
-			if (!IS_ERR(stream->next_out))
+			if (stream->next_out != NULL)
 				stream->avail_out = PAGE_SIZE;
 		}
 
-		if (!stream->next_out) {
-			if (!buf) {
-				buf = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-				if (!buf)
-					goto out;
-			}
-			stream->next_out = buf;
-		}
-
 		if (!zlib_init) {
 			zlib_err = zlib_inflateInit(stream);
 			if (zlib_err != Z_OK) {
@@ -125,13 +115,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	if (k < b)
 		goto out;
 
-	kfree(buf);
 	return stream->total_out;
 
 out:
 	for (; k < b; k++)
 		put_bh(bh[k]);
-	kfree(buf);
 
 	return -EIO;
 }

diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
new file mode 100644
index 0000000..d000aa2
--- /dev/null
+++ b/include/crypto/chacha.h

@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values and helper functions for the ChaCha and XChaCha stream ciphers.
+ *
+ * XChaCha extends ChaCha's nonce to 192 bits, while provably retaining ChaCha's
+ * security.  Here they share the same key size, tfm context, and setkey
+ * function; only their IV size and encrypt/decrypt function differ.
+ *
+ * The ChaCha paper specifies 20, 12, and 8-round variants.  In general, it is
+ * recommended to use the 20-round variant ChaCha20.  However, the other
+ * variants can be needed in some performance-sensitive scenarios.  The generic
+ * ChaCha code currently allows only the 20 and 12-round variants.
+ */
+
+#ifndef _CRYPTO_CHACHA_H
+#define _CRYPTO_CHACHA_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
+#define CHACHA_IV_SIZE		16
+
+#define CHACHA_KEY_SIZE		32
+#define CHACHA_BLOCK_SIZE	64
+
+/* 192-bit nonce, then 64-bit stream position */
+#define XCHACHA_IV_SIZE		32
+
+struct chacha_ctx {
+	u32 key[8];
+	int nrounds;
+};
+
+void chacha_block(u32 *state, u8 *stream, int nrounds);
+static inline void chacha20_block(u32 *state, u8 *stream)
+{
+	chacha_block(state, stream, 20);
+}
+void hchacha_block(const u32 *in, u32 *out, int nrounds);
+
+void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv);
+
+int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keysize);
+int crypto_chacha12_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keysize);
+
+int crypto_chacha_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			struct scatterlist *src, unsigned int nbytes);
+int crypto_xchacha_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			 struct scatterlist *src, unsigned int nbytes);
+
+#endif /* _CRYPTO_CHACHA_H */

diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
deleted file mode 100644
index 20d20f68..0000000
--- a/include/crypto/chacha20.h
+++ /dev/null

@@ -1,26 +0,0 @@
-/*
- * Common values for the ChaCha20 algorithm
- */
-
-#ifndef _CRYPTO_CHACHA20_H
-#define _CRYPTO_CHACHA20_H
-
-#include <linux/types.h>
-#include <linux/crypto.h>
-
-#define CHACHA20_IV_SIZE	16
-#define CHACHA20_KEY_SIZE	32
-#define CHACHA20_BLOCK_SIZE	64
-
-struct chacha20_ctx {
-	u32 key[8];
-};
-
-void chacha20_block(u32 *state, void *stream);
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
-int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keysize);
-int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes);
-
-#endif

diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h
new file mode 100644
index 0000000..53c0442
--- /dev/null
+++ b/include/crypto/nhpoly1305.h

@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values and helper functions for the NHPoly1305 hash function.
+ */
+
+#ifndef _NHPOLY1305_H
+#define _NHPOLY1305_H
+
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+
+/* NH parameterization: */
+
+/* Endianness: little */
+/* Word size: 32 bits (works well on NEON, SSE2, AVX2) */
+
+/* Stride: 2 words (optimal on ARM32 NEON; works okay on other CPUs too) */
+#define NH_PAIR_STRIDE		2
+#define NH_MESSAGE_UNIT		(NH_PAIR_STRIDE * 2 * sizeof(u32))
+
+/* Num passes (Toeplitz iteration count): 4, to give ε = 2^{-128} */
+#define NH_NUM_PASSES		4
+#define NH_HASH_BYTES		(NH_NUM_PASSES * sizeof(u64))
+
+/* Max message size: 1024 bytes (32x compression factor) */
+#define NH_NUM_STRIDES		64
+#define NH_MESSAGE_WORDS	(NH_PAIR_STRIDE * 2 * NH_NUM_STRIDES)
+#define NH_MESSAGE_BYTES	(NH_MESSAGE_WORDS * sizeof(u32))
+#define NH_KEY_WORDS		(NH_MESSAGE_WORDS + \
+				 NH_PAIR_STRIDE * 2 * (NH_NUM_PASSES - 1))
+#define NH_KEY_BYTES		(NH_KEY_WORDS * sizeof(u32))
+
+#define NHPOLY1305_KEY_SIZE	(POLY1305_BLOCK_SIZE + NH_KEY_BYTES)
+
+struct nhpoly1305_key {
+	struct poly1305_key poly_key;
+	u32 nh_key[NH_KEY_WORDS];
+};
+
+struct nhpoly1305_state {
+
+	/* Running total of polynomial evaluation */
+	struct poly1305_state poly_state;
+
+	/* Partial block buffer */
+	u8 buffer[NH_MESSAGE_UNIT];
+	unsigned int buflen;
+
+	/*
+	 * Number of bytes remaining until the current NH message reaches
+	 * NH_MESSAGE_BYTES.  When nonzero, 'nh_hash' holds the partial NH hash.
+	 */
+	unsigned int nh_remaining;
+
+	__le64 nh_hash[NH_NUM_PASSES];
+};
+
+typedef void (*nh_t)(const u32 *key, const u8 *message, size_t message_len,
+		     __le64 hash[NH_NUM_PASSES]);
+
+int crypto_nhpoly1305_setkey(struct crypto_shash *tfm,
+			     const u8 *key, unsigned int keylen);
+
+int crypto_nhpoly1305_init(struct shash_desc *desc);
+int crypto_nhpoly1305_update(struct shash_desc *desc,
+			     const u8 *src, unsigned int srclen);
+int crypto_nhpoly1305_update_helper(struct shash_desc *desc,
+				    const u8 *src, unsigned int srclen,
+				    nh_t nh_fn);
+int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst);
+int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst,
+				   nh_t nh_fn);
+
+#endif /* _NHPOLY1305_H */

diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index d586f74..0c0b83a 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h

@@ -12,13 +12,21 @@
 #define POLY1305_KEY_SIZE	32
 #define POLY1305_DIGEST_SIZE	16
 
+struct poly1305_key {
+	u32 r[5];	/* key, base 2^26 */
+};
+
+struct poly1305_state {
+	u32 h[5];	/* accumulator, base 2^26 */
+};
+
 struct poly1305_desc_ctx {
 	/* key */
-	u32 r[5];
+	struct poly1305_key r;
 	/* finalize key */
 	u32 s[4];
 	/* accumulator */
-	u32 h[5];
+	struct poly1305_state h;
 	/* partial buffer */
 	u8 buf[POLY1305_BLOCK_SIZE];
 	/* bytes used in partial buffer */
@@ -29,6 +37,22 @@ struct poly1305_desc_ctx {
 	bool sset;
 };
 
+/*
+ * Poly1305 core functions.  These implement the ε-almost-∆-universal hash
+ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
+ * ("s key") at the end.  They also only support block-aligned inputs.
+ */
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
+static inline void poly1305_core_init(struct poly1305_state *state)
+{
+	memset(state->h, 0, sizeof(state->h));
+}
+void poly1305_core_blocks(struct poly1305_state *state,
+			  const struct poly1305_key *key,
+			  const void *src, unsigned int nblocks);
+void poly1305_core_emit(const struct poly1305_state *state, void *dst);
+
+/* Crypto API helper functions for the Poly1305 MAC */
 int crypto_poly1305_init(struct shash_desc *desc);
 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 					const u8 *src, unsigned int srclen);

diff --git a/include/crypto/speck.h b/include/crypto/speck.h
deleted file mode 100644
index 73cfc95..0000000
--- a/include/crypto/speck.h
+++ /dev/null

@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Common values for the Speck algorithm
- */
-
-#ifndef _CRYPTO_SPECK_H
-#define _CRYPTO_SPECK_H
-
-#include <linux/types.h>
-
-/* Speck128 */
-
-#define SPECK128_BLOCK_SIZE	16
-
-#define SPECK128_128_KEY_SIZE	16
-#define SPECK128_128_NROUNDS	32
-
-#define SPECK128_192_KEY_SIZE	24
-#define SPECK128_192_NROUNDS	33
-
-#define SPECK128_256_KEY_SIZE	32
-#define SPECK128_256_NROUNDS	34
-
-struct speck128_tfm_ctx {
-	u64 round_keys[SPECK128_256_NROUNDS];
-	int nrounds;
-};
-
-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in);
-
-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in);
-
-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
-			   unsigned int keysize);
-
-/* Speck64 */
-
-#define SPECK64_BLOCK_SIZE	8
-
-#define SPECK64_96_KEY_SIZE	12
-#define SPECK64_96_NROUNDS	26
-
-#define SPECK64_128_KEY_SIZE	16
-#define SPECK64_128_NROUNDS	27
-
-struct speck64_tfm_ctx {
-	u32 round_keys[SPECK64_128_NROUNDS];
-	int nrounds;
-};
-
-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in);
-
-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in);
-
-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
-			  unsigned int keysize);
-
-#endif /* _CRYPTO_SPECK_H */

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 8da93b9..1269d9b 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h

@@ -57,4 +57,23 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
 #else
 #define PANIC_CORRUPTION 0
 #endif  /* CONFIG_PANIC_ON_DATA_CORRUPTION */
+
+/*
+ * Since detected data corruption should stop operation on the affected
+ * structures. Return value must be checked and sanely acted on by caller.
+ */
+static inline __must_check bool check_data_corruption(bool v) { return v; }
+#define CHECK_DATA_CORRUPTION(condition, fmt, ...)			 \
+	check_data_corruption(({					 \
+		bool corruption = unlikely(condition);			 \
+		if (corruption) {					 \
+			if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+				pr_err(fmt, ##__VA_ARGS__);		 \
+				BUG();					 \
+			} else						 \
+				WARN(1, fmt, ##__VA_ARGS__);		 \
+		}							 \
+		corruption;						 \
+	}))
+
 #endif	/* _LINUX_BUG_H */

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 3a4f264..a8b4284 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h

@@ -117,7 +117,7 @@ struct clocksource {
 #define CLOCK_SOURCE_RESELECT			0x100
 
 /* simplify initialization of mask field */
-#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
+#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
 
 static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from)
 {

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 058dd5d..cd15818 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h

@@ -24,6 +24,14 @@
 #undef inline
 #define inline inline __attribute__((unused)) notrace
 
+#undef __no_sanitize_address
+#define __no_sanitize_address __attribute__((no_sanitize("address")))
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
+
 #ifdef CONFIG_LTO_CLANG
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 #define __norecordmcount \
@@ -42,15 +50,3 @@
 #if __has_feature(address_sanitizer)
 #define __SANITIZE_ADDRESS__
 #endif
-
-#undef __no_sanitize_address
-#if __has_feature(address_sanitizer)
-#define __no_sanitize_address __attribute__((no_sanitize("kernel-address")))
-#else
-#define __no_sanitize_address
-#endif
-
-/* Clang doesn't have a way to turn it off per-function, yet. */
-#ifdef __noretpoline
-#undef __noretpoline
-#endif

diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h
index 0d87b80..d4f063a 100644
--- a/include/linux/cpufreq_times.h
+++ b/include/linux/cpufreq_times.h

@@ -36,6 +36,11 @@ void cpufreq_times_record_transition(struct cpufreq_freqs *freq);
 void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end);
 int single_uid_time_in_state_open(struct inode *inode, struct file *file);
 #else
+static inline void cpufreq_task_times_init(struct task_struct *p) {}
+static inline void cpufreq_task_times_alloc(struct task_struct *p) {}
+static inline void cpufreq_task_times_exit(struct task_struct *p) {}
+static inline void cpufreq_acct_update_power(struct task_struct *p,
+					     u64 cputime) {}
 static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {}
 static inline void cpufreq_times_record_transition(
 	struct cpufreq_freqs *freq) {}

diff --git a/include/linux/list.h b/include/linux/list.h
index 5809e9a2..d1039ec 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h

@@ -28,27 +28,42 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
 	list->prev = list;
 }
 
+#ifdef CONFIG_DEBUG_LIST
+extern bool __list_add_valid(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+extern bool __list_del_entry_valid(struct list_head *entry);
+#else
+static inline bool __list_add_valid(struct list_head *new,
+				struct list_head *prev,
+				struct list_head *next)
+{
+	return true;
+}
+static inline bool __list_del_entry_valid(struct list_head *entry)
+{
+	return true;
+}
+#endif
+
 /*
  * Insert a new entry between two known consecutive entries.
  *
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
+	if (!__list_add_valid(new, prev, next))
+		return;
+
 	next->prev = new;
 	new->next = next;
 	new->prev = prev;
 	WRITE_ONCE(prev->next, new);
 }
-#else
-extern void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
-#endif
 
 /**
  * list_add - add a new entry
@@ -96,22 +111,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
  * Note: list_empty() on entry does not return true after this, the entry is
  * in an undefined state.
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void __list_del_entry(struct list_head *entry)
 {
+	if (!__list_del_entry_valid(entry))
+		return;
+
 	__list_del(entry->prev, entry->next);
 }
 
 static inline void list_del(struct list_head *entry)
 {
-	__list_del(entry->prev, entry->next);
+	__list_del_entry(entry);
 	entry->next = LIST_POISON1;
 	entry->prev = LIST_POISON2;
 }
-#else
-extern void __list_del_entry(struct list_head *entry);
-extern void list_del(struct list_head *entry);
-#endif
 
 /**
  * list_replace - replace old entry by new one

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8beb98d..4f7a956 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h

@@ -45,19 +45,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add_rcu(struct list_head *new,
 		struct list_head *prev, struct list_head *next)
 {
+	if (!__list_add_valid(new, prev, next))
+		return;
+
 	new->next = next;
 	new->prev = prev;
 	rcu_assign_pointer(list_next_rcu(prev), new);
 	next->prev = new;
 }
-#else
-void __list_add_rcu(struct list_head *new,
-		    struct list_head *prev, struct list_head *next);
-#endif
 
 /**
  * list_add_rcu - add a new entry to rcu-protected list

diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index c97fac8..35ec6c4 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h

@@ -40,10 +40,10 @@ struct sync_file {
 #endif
 
 	wait_queue_head_t	wq;
+	unsigned long		flags;
 
 	struct fence		*fence;
 	struct fence_cb cb;
-	unsigned long flags;
 };
 
 #define POLL_ENABLED 0

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91a740f..ef4bc88 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h

@@ -205,6 +205,26 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	}								\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
+/*
+ * Called before coming back to user-mode. Returning to user-mode with an
+ * address limit different than USER_DS can allow to overwrite kernel memory.
+ */
+static inline void addr_limit_user_check(void)
+{
+#ifdef TIF_FSCHECK
+	if (!test_thread_flag(TIF_FSCHECK))
+		return;
+#endif
+
+	if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+				  "Invalid address limit on user-mode return"))
+		force_sig(SIGKILL, current);
+
+#ifdef TIF_FSCHECK
+	clear_thread_flag(TIF_FSCHECK);
+#endif
+}
+
 asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
 			       qid_t id, void __user *addr);
 asmlinkage long sys_time(time_t __user *tloc);

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d3c19f8..2839d62 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h

@@ -532,7 +532,7 @@ static inline void sysfs_notify_dirent(struct kernfs_node *kn)
 }
 
 static inline struct kernfs_node *sysfs_get_dirent(struct kernfs_node *parent,
-						   const unsigned char *name)
+						   const char *name)
 {
 	return kernfs_find_and_get(parent, name);
 }

diff --git a/include/linux/verification.h b/include/linux/verification.h
index cfa4730..60ea906 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h

@@ -32,9 +32,13 @@ enum key_being_used_for {
 };
 extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
 
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-
 struct key;
+struct public_key_signature;
+
+extern int verify_signature_one(const struct public_key_signature *sig,
+			   struct key *trusted_keys, const char *keyid);
+
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 extern int verify_pkcs7_signature(const void *data, size_t len,
 				  const void *raw_pkcs7, size_t pkcs7_len,

diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h
new file mode 100644
index 0000000..9e1f42c
--- /dev/null
+++ b/include/linux/xxhash.h

@@ -0,0 +1,236 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following disclaimer
+ *     in the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: http://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/*
+ * Notice extracted from xxHash homepage:
+ *
+ * xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+ * It also successfully passes all tests from the SMHasher suite.
+ *
+ * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2
+ * Duo @3GHz)
+ *
+ * Name            Speed       Q.Score   Author
+ * xxHash          5.4 GB/s     10
+ * CrapWow         3.2 GB/s      2       Andrew
+ * MumurHash 3a    2.7 GB/s     10       Austin Appleby
+ * SpookyHash      2.0 GB/s     10       Bob Jenkins
+ * SBox            1.4 GB/s      9       Bret Mulvey
+ * Lookup3         1.2 GB/s      9       Bob Jenkins
+ * SuperFastHash   1.2 GB/s      1       Paul Hsieh
+ * CityHash64      1.05 GB/s    10       Pike & Alakuijala
+ * FNV             0.55 GB/s     5       Fowler, Noll, Vo
+ * CRC32           0.43 GB/s     9
+ * MD5-32          0.33 GB/s    10       Ronald L. Rivest
+ * SHA1-32         0.28 GB/s    10
+ *
+ * Q.Score is a measure of quality of the hash function.
+ * It depends on successfully passing SMHasher test set.
+ * 10 is a perfect score.
+ *
+ * A 64-bits version, named xxh64 offers much better speed,
+ * but for 64-bits applications only.
+ * Name     Speed on 64 bits    Speed on 32 bits
+ * xxh64       13.8 GB/s            1.9 GB/s
+ * xxh32        6.8 GB/s            6.0 GB/s
+ */
+
+#ifndef XXHASH_H
+#define XXHASH_H
+
+#include <linux/types.h>
+
+/*-****************************
+ * Simple Hash Functions
+ *****************************/
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ * @seed:   The seed can be used to alter the result predictably.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+ *
+ * Return:  The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+/**
+ * xxh64() - calculate the 64-bit hash of the input with a given seed.
+ *
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ * @seed:   The seed can be used to alter the result predictably.
+ *
+ * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
+ *
+ * Return:  The 64-bit hash of the data.
+ */
+uint64_t xxh64(const void *input, size_t length, uint64_t seed);
+
+/*-****************************
+ * Streaming Hash Functions
+ *****************************/
+
+/*
+ * These definitions are only meant to allow allocation of XXH state
+ * statically, on stack, or in a struct for example.
+ * Do not use members directly.
+ */
+
+/**
+ * struct xxh32_state - private xxh32 state, do not use members directly
+ */
+struct xxh32_state {
+	uint32_t total_len_32;
+	uint32_t large_len;
+	uint32_t v1;
+	uint32_t v2;
+	uint32_t v3;
+	uint32_t v4;
+	uint32_t mem32[4];
+	uint32_t memsize;
+};
+
+/**
+ * struct xxh32_state - private xxh64 state, do not use members directly
+ */
+struct xxh64_state {
+	uint64_t total_len;
+	uint64_t v1;
+	uint64_t v2;
+	uint64_t v3;
+	uint64_t v4;
+	uint64_t mem64[4];
+	uint32_t memsize;
+};
+
+/**
+ * xxh32_reset() - reset the xxh32 state to start a new hashing operation
+ *
+ * @state: The xxh32 state to reset.
+ * @seed:  Initialize the hash state with this seed.
+ *
+ * Call this function on any xxh32_state to prepare for a new hashing operation.
+ */
+void xxh32_reset(struct xxh32_state *state, uint32_t seed);
+
+/**
+ * xxh32_update() - hash the data given and update the xxh32 state
+ *
+ * @state:  The xxh32 state to update.
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ *
+ * After calling xxh32_reset() call xxh32_update() as many times as necessary.
+ *
+ * Return:  Zero on success, otherwise an error code.
+ */
+int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
+
+/**
+ * xxh32_digest() - produce the current xxh32 hash
+ *
+ * @state: Produce the current xxh32 hash of this state.
+ *
+ * A hash value can be produced at any time. It is still possible to continue
+ * inserting input into the hash state after a call to xxh32_digest(), and
+ * generate new hashes later on, by calling xxh32_digest() again.
+ *
+ * Return: The xxh32 hash stored in the state.
+ */
+uint32_t xxh32_digest(const struct xxh32_state *state);
+
+/**
+ * xxh64_reset() - reset the xxh64 state to start a new hashing operation
+ *
+ * @state: The xxh64 state to reset.
+ * @seed:  Initialize the hash state with this seed.
+ */
+void xxh64_reset(struct xxh64_state *state, uint64_t seed);
+
+/**
+ * xxh64_update() - hash the data given and update the xxh64 state
+ * @state:  The xxh64 state to update.
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ *
+ * After calling xxh64_reset() call xxh64_update() as many times as necessary.
+ *
+ * Return:  Zero on success, otherwise an error code.
+ */
+int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
+
+/**
+ * xxh64_digest() - produce the current xxh64 hash
+ *
+ * @state: Produce the current xxh64 hash of this state.
+ *
+ * A hash value can be produced at any time. It is still possible to continue
+ * inserting input into the hash state after a call to xxh64_digest(), and
+ * generate new hashes later on, by calling xxh64_digest() again.
+ *
+ * Return: The xxh64 hash stored in the state.
+ */
+uint64_t xxh64_digest(const struct xxh64_state *state);
+
+/*-**************************
+ * Utils
+ ***************************/
+
+/**
+ * xxh32_copy_state() - copy the source state into the destination state
+ *
+ * @src: The source xxh32 state.
+ * @dst: The destination xxh32 state.
+ */
+void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
+
+/**
+ * xxh64_copy_state() - copy the source state into the destination state
+ *
+ * @src: The source xxh64 state.
+ * @dst: The destination xxh64 state.
+ */
+void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
+
+#endif /* XXHASH_H */

diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 57a8e98..2219cce 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h

@@ -47,6 +47,8 @@ void zs_destroy_pool(struct zs_pool *pool);
 unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags);
 void zs_free(struct zs_pool *pool, unsigned long obj);
 
+size_t zs_huge_class_size(struct zs_pool *pool);
+
 void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 			enum zs_mapmode mm);
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);

diff --git a/include/linux/zstd.h b/include/linux/zstd.h
new file mode 100644
index 0000000..249575e
--- /dev/null
+++ b/include/linux/zstd.h

@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+#ifndef ZSTD_H
+#define ZSTD_H
+
+/* ======   Dependency   ======*/
+#include <linux/types.h>   /* size_t */
+
+
+/*-*****************************************************************************
+ * Introduction
+ *
+ * zstd, short for Zstandard, is a fast lossless compression algorithm,
+ * targeting real-time compression scenarios at zlib-level and better
+ * compression ratios. The zstd compression library provides in-memory
+ * compression and decompression functions. The library supports compression
+ * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled
+ * ultra, should be used with caution, as they require more memory.
+ * Compression can be done in:
+ *  - a single step, reusing a context (described as Explicit memory management)
+ *  - unbounded multiple steps (described as Streaming compression)
+ * The compression ratio achievable on small data can be highly improved using
+ * compression with a dictionary in:
+ *  - a single step (described as Simple dictionary API)
+ *  - a single step, reusing a dictionary (described as Fast dictionary API)
+ ******************************************************************************/
+
+/*======  Helper functions  ======*/
+
+/**
+ * enum ZSTD_ErrorCode - zstd error codes
+ *
+ * Functions that return size_t can be checked for errors using ZSTD_isError()
+ * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode().
+ */
+typedef enum {
+	ZSTD_error_no_error,
+	ZSTD_error_GENERIC,
+	ZSTD_error_prefix_unknown,
+	ZSTD_error_version_unsupported,
+	ZSTD_error_parameter_unknown,
+	ZSTD_error_frameParameter_unsupported,
+	ZSTD_error_frameParameter_unsupportedBy32bits,
+	ZSTD_error_frameParameter_windowTooLarge,
+	ZSTD_error_compressionParameter_unsupported,
+	ZSTD_error_init_missing,
+	ZSTD_error_memory_allocation,
+	ZSTD_error_stage_wrong,
+	ZSTD_error_dstSize_tooSmall,
+	ZSTD_error_srcSize_wrong,
+	ZSTD_error_corruption_detected,
+	ZSTD_error_checksum_wrong,
+	ZSTD_error_tableLog_tooLarge,
+	ZSTD_error_maxSymbolValue_tooLarge,
+	ZSTD_error_maxSymbolValue_tooSmall,
+	ZSTD_error_dictionary_corrupted,
+	ZSTD_error_dictionary_wrong,
+	ZSTD_error_dictionaryCreation_failed,
+	ZSTD_error_maxCode
+} ZSTD_ErrorCode;
+
+/**
+ * ZSTD_maxCLevel() - maximum compression level available
+ *
+ * Return: Maximum compression level available.
+ */
+int ZSTD_maxCLevel(void);
+/**
+ * ZSTD_compressBound() - maximum compressed size in worst case scenario
+ * @srcSize: The size of the data to compress.
+ *
+ * Return:   The maximum compressed size in the worst case scenario.
+ */
+size_t ZSTD_compressBound(size_t srcSize);
+/**
+ * ZSTD_isError() - tells if a size_t function result is an error code
+ * @code:  The function result to check for error.
+ *
+ * Return: Non-zero iff the code is an error.
+ */
+static __attribute__((unused)) unsigned int ZSTD_isError(size_t code)
+{
+	return code > (size_t)-ZSTD_error_maxCode;
+}
+/**
+ * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode
+ * @functionResult: The result of a function for which ZSTD_isError() is true.
+ *
+ * Return:          The ZSTD_ErrorCode corresponding to the functionResult or 0
+ *                  if the functionResult isn't an error.
+ */
+static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(
+	size_t functionResult)
+{
+	if (!ZSTD_isError(functionResult))
+		return (ZSTD_ErrorCode)0;
+	return (ZSTD_ErrorCode)(0 - functionResult);
+}
+
+/**
+ * enum ZSTD_strategy - zstd compression search strategy
+ *
+ * From faster to stronger.
+ */
+typedef enum {
+	ZSTD_fast,
+	ZSTD_dfast,
+	ZSTD_greedy,
+	ZSTD_lazy,
+	ZSTD_lazy2,
+	ZSTD_btlazy2,
+	ZSTD_btopt,
+	ZSTD_btopt2
+} ZSTD_strategy;
+
+/**
+ * struct ZSTD_compressionParameters - zstd compression parameters
+ * @windowLog:    Log of the largest match distance. Larger means more
+ *                compression, and more memory needed during decompression.
+ * @chainLog:     Fully searched segment. Larger means more compression, slower,
+ *                and more memory (useless for fast).
+ * @hashLog:      Dispatch table. Larger means more compression,
+ *                slower, and more memory.
+ * @searchLog:    Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ *                sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ *                more compression, and slower.
+ * @strategy:     The zstd compression strategy.
+ */
+typedef struct {
+	unsigned int windowLog;
+	unsigned int chainLog;
+	unsigned int hashLog;
+	unsigned int searchLog;
+	unsigned int searchLength;
+	unsigned int targetLength;
+	ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+
+/**
+ * struct ZSTD_frameParameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the frame
+ *                   header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the end
+ *                   of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame header
+ *                   when using dictionary compression.
+ *
+ * The default value is all fields set to 0.
+ */
+typedef struct {
+	unsigned int contentSizeFlag;
+	unsigned int checksumFlag;
+	unsigned int noDictIDFlag;
+} ZSTD_frameParameters;
+
+/**
+ * struct ZSTD_parameters - zstd parameters
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
+ */
+typedef struct {
+	ZSTD_compressionParameters cParams;
+	ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+/**
+ * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level
+ * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
+ * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
+ * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ *
+ * Return:            The selected ZSTD_compressionParameters.
+ */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel,
+	unsigned long long estimatedSrcSize, size_t dictSize);
+
+/**
+ * ZSTD_getParams() - returns ZSTD_parameters for selected level
+ * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
+ * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
+ * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ *
+ * The same as ZSTD_getCParams() except also selects the default frame
+ * parameters (all zero).
+ *
+ * Return:            The selected ZSTD_parameters.
+ */
+ZSTD_parameters ZSTD_getParams(int compressionLevel,
+	unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*-*************************************
+ * Explicit memory management
+ **************************************/
+
+/**
+ * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx
+ * @cParams: The compression parameters to be used for compression.
+ *
+ * If multiple compression parameters might be used, the caller must call
+ * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum
+ * size.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           ZSTD_initCCtx().
+ */
+size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams);
+
+/**
+ * struct ZSTD_CCtx - the zstd compression context
+ *
+ * When compressing many times it is recommended to allocate a context just once
+ * and reuse it for each successive compression operation.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+/**
+ * ZSTD_initCCtx() - initialize a zstd compression context
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to
+ *                 determine how large the workspace must be.
+ *
+ * Return:         A compression context emplaced into workspace.
+ */
+ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_compressCCtx() - compress src into dst
+ * @ctx:         The context. Must have been initialized with a workspace at
+ *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
+ * @dst:         The buffer to compress src into.
+ * @dstCapacity: The size of the destination buffer. May be any size, but
+ *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:         The data to compress.
+ * @srcSize:     The size of the data to compress.
+ * @params:      The parameters to use for compression. See ZSTD_getParams().
+ *
+ * Return:       The compressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, ZSTD_parameters params);
+
+/**
+ * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ *         ZSTD_initDCtx().
+ */
+size_t ZSTD_DCtxWorkspaceBound(void);
+
+/**
+ * struct ZSTD_DCtx - the zstd decompression context
+ *
+ * When decompressing many times it is recommended to allocate a context just
+ * once and reuse it for each successive decompression operation.
+ */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+/**
+ * ZSTD_initDCtx() - initialize a zstd decompression context
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to
+ *                 determine how large the workspace must be.
+ *
+ * Return:         A decompression context emplaced into workspace.
+ */
+ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_decompressDCtx() - decompress zstd compressed src into dst
+ * @ctx:         The decompression context.
+ * @dst:         The buffer to decompress src into.
+ * @dstCapacity: The size of the destination buffer. Must be at least as large
+ *               as the decompressed size. If the caller cannot upper bound the
+ *               decompressed size, then it's better to use the streaming API.
+ * @src:         The zstd compressed data to decompress. Multiple concatenated
+ *               frames and skippable frames are allowed.
+ * @srcSize:     The exact size of the data to decompress.
+ *
+ * Return:       The decompressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+
+/*-************************
+ * Simple dictionary API
+ **************************/
+
+/**
+ * ZSTD_compress_usingDict() - compress src into dst using a dictionary
+ * @ctx:         The context. Must have been initialized with a workspace at
+ *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
+ * @dst:         The buffer to compress src into.
+ * @dstCapacity: The size of the destination buffer. May be any size, but
+ *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:         The data to compress.
+ * @srcSize:     The size of the data to compress.
+ * @dict:        The dictionary to use for compression.
+ * @dictSize:    The size of the dictionary.
+ * @params:      The parameters to use for compression. See ZSTD_getParams().
+ *
+ * Compression using a predefined dictionary. The same dictionary must be used
+ * during decompression.
+ *
+ * Return:       The compressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, const void *dict, size_t dictSize,
+	ZSTD_parameters params);
+
+/**
+ * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary
+ * @ctx:         The decompression context.
+ * @dst:         The buffer to decompress src into.
+ * @dstCapacity: The size of the destination buffer. Must be at least as large
+ *               as the decompressed size. If the caller cannot upper bound the
+ *               decompressed size, then it's better to use the streaming API.
+ * @src:         The zstd compressed data to decompress. Multiple concatenated
+ *               frames and skippable frames are allowed.
+ * @srcSize:     The exact size of the data to decompress.
+ * @dict:        The dictionary to use for decompression. The same dictionary
+ *               must've been used to compress the data.
+ * @dictSize:    The size of the dictionary.
+ *
+ * Return:       The decompressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, const void *dict, size_t dictSize);
+
+/*-**************************
+ * Fast dictionary API
+ ***************************/
+
+/**
+ * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict
+ * @cParams: The compression parameters to be used for compression.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           ZSTD_initCDict().
+ */
+size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams);
+
+/**
+ * struct ZSTD_CDict - a digested dictionary to be used for compression
+ */
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/**
+ * ZSTD_initCDict() - initialize a digested dictionary for compression
+ * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
+ *                 ZSTD_CDict so it must outlive the returned ZSTD_CDict.
+ * @dictSize:      The size of the dictionary.
+ * @params:        The parameters to use for compression. See ZSTD_getParams().
+ * @workspace:     The workspace. It must outlive the returned ZSTD_CDict.
+ * @workspaceSize: The workspace size. Must be at least
+ *                 ZSTD_CDictWorkspaceBound(params.cParams).
+ *
+ * When compressing multiple messages / blocks with the same dictionary it is
+ * recommended to load it just once. The ZSTD_CDict merely references the
+ * dictBuffer, so it must outlive the returned ZSTD_CDict.
+ *
+ * Return:         The digested dictionary emplaced into workspace.
+ */
+ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize,
+	ZSTD_parameters params, void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict
+ * @ctx:         The context. Must have been initialized with a workspace at
+ *               least as large as ZSTD_CCtxWorkspaceBound(cParams) where
+ *               cParams are the compression parameters used to initialize the
+ *               cdict.
+ * @dst:         The buffer to compress src into.
+ * @dstCapacity: The size of the destination buffer. May be any size, but
+ *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:         The data to compress.
+ * @srcSize:     The size of the data to compress.
+ * @cdict:       The digested dictionary to use for compression.
+ * @params:      The parameters to use for compression. See ZSTD_getParams().
+ *
+ * Compression using a digested dictionary. The same dictionary must be used
+ * during decompression.
+ *
+ * Return:       The compressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, const ZSTD_CDict *cdict);
+
+
+/**
+ * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict
+ *
+ * Return:  A lower bound on the size of the workspace that is passed to
+ *          ZSTD_initDDict().
+ */
+size_t ZSTD_DDictWorkspaceBound(void);
+
+/**
+ * struct ZSTD_DDict - a digested dictionary to be used for decompression
+ */
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/**
+ * ZSTD_initDDict() - initialize a digested dictionary for decompression
+ * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
+ *                 ZSTD_DDict so it must outlive the returned ZSTD_DDict.
+ * @dictSize:      The size of the dictionary.
+ * @workspace:     The workspace. It must outlive the returned ZSTD_DDict.
+ * @workspaceSize: The workspace size. Must be at least
+ *                 ZSTD_DDictWorkspaceBound().
+ *
+ * When decompressing multiple messages / blocks with the same dictionary it is
+ * recommended to load it just once. The ZSTD_DDict merely references the
+ * dictBuffer, so it must outlive the returned ZSTD_DDict.
+ *
+ * Return:         The digested dictionary emplaced into workspace.
+ */
+ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize,
+	void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict
+ * @ctx:         The decompression context.
+ * @dst:         The buffer to decompress src into.
+ * @dstCapacity: The size of the destination buffer. Must be at least as large
+ *               as the decompressed size. If the caller cannot upper bound the
+ *               decompressed size, then it's better to use the streaming API.
+ * @src:         The zstd compressed data to decompress. Multiple concatenated
+ *               frames and skippable frames are allowed.
+ * @srcSize:     The exact size of the data to decompress.
+ * @ddict:       The digested dictionary to use for decompression. The same
+ *               dictionary must've been used to compress the data.
+ *
+ * Return:       The decompressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst,
+	size_t dstCapacity, const void *src, size_t srcSize,
+	const ZSTD_DDict *ddict);
+
+
+/*-**************************
+ * Streaming
+ ***************************/
+
+/**
+ * struct ZSTD_inBuffer - input buffer for streaming
+ * @src:  Start of the input buffer.
+ * @size: Size of the input buffer.
+ * @pos:  Position where reading stopped. Will be updated.
+ *        Necessarily 0 <= pos <= size.
+ */
+typedef struct ZSTD_inBuffer_s {
+	const void *src;
+	size_t size;
+	size_t pos;
+} ZSTD_inBuffer;
+
+/**
+ * struct ZSTD_outBuffer - output buffer for streaming
+ * @dst:  Start of the output buffer.
+ * @size: Size of the output buffer.
+ * @pos:  Position where writing stopped. Will be updated.
+ *        Necessarily 0 <= pos <= size.
+ */
+typedef struct ZSTD_outBuffer_s {
+	void *dst;
+	size_t size;
+	size_t pos;
+} ZSTD_outBuffer;
+
+
+
+/*-*****************************************************************************
+ * Streaming compression - HowTo
+ *
+ * A ZSTD_CStream object is required to track streaming operation.
+ * Use ZSTD_initCStream() to initialize a ZSTD_CStream object.
+ * ZSTD_CStream objects can be reused multiple times on consecutive compression
+ * operations. It is recommended to re-use ZSTD_CStream in situations where many
+ * streaming operations will be achieved consecutively. Use one separate
+ * ZSTD_CStream per thread for parallel execution.
+ *
+ * Use ZSTD_compressStream() repetitively to consume input stream.
+ * The function will automatically update both `pos` fields.
+ * Note that it may not consume the entire input, in which case `pos < size`,
+ * and it's up to the caller to present again remaining data.
+ * It returns a hint for the preferred number of bytes to use as an input for
+ * the next function call.
+ *
+ * At any moment, it's possible to flush whatever data remains within internal
+ * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might
+ * still be some content left within the internal buffer if `output->size` is
+ * too small. It returns the number of bytes left in the internal buffer and
+ * must be called until it returns 0.
+ *
+ * ZSTD_endStream() instructs to finish a frame. It will perform a flush and
+ * write frame epilogue. The epilogue is required for decoders to consider a
+ * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush
+ * the full content if `output->size` is too small. In which case, call again
+ * ZSTD_endStream() to complete the flush. It returns the number of bytes left
+ * in the internal buffer and must be called until it returns 0.
+ ******************************************************************************/
+
+/**
+ * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream
+ * @cParams: The compression parameters to be used for compression.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           ZSTD_initCStream() and ZSTD_initCStream_usingCDict().
+ */
+size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams);
+
+/**
+ * struct ZSTD_CStream - the zstd streaming compression context
+ */
+typedef struct ZSTD_CStream_s ZSTD_CStream;
+
+/*===== ZSTD_CStream management functions =====*/
+/**
+ * ZSTD_initCStream() - initialize a zstd streaming compression context
+ * @params:         The zstd compression parameters.
+ * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must
+ *                  pass the source size (zero means empty source). Otherwise,
+ *                  the caller may optionally pass the source size, or zero if
+ *                  unknown.
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspaceSize:  The size of workspace.
+ *                  Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine
+ *                  how large the workspace must be.
+ *
+ * Return:          The zstd streaming compression context.
+ */
+ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params,
+	unsigned long long pledgedSrcSize, void *workspace,
+	size_t workspaceSize);
+
+/**
+ * ZSTD_initCStream_usingCDict() - initialize a streaming compression context
+ * @cdict:          The digested dictionary to use for compression.
+ * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspaceSize:  The size of workspace. Call ZSTD_CStreamWorkspaceBound()
+ *                  with the cParams used to initialize the cdict to determine
+ *                  how large the workspace must be.
+ *
+ * Return:          The zstd streaming compression context.
+ */
+ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict,
+	unsigned long long pledgedSrcSize, void *workspace,
+	size_t workspaceSize);
+
+/*===== Streaming compression functions =====*/
+/**
+ * ZSTD_resetCStream() - reset the context using parameters from creation
+ * @zcs:            The zstd streaming compression context to reset.
+ * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame
+ * content size is always written into the frame header.
+ *
+ * Return:          Zero or an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize);
+/**
+ * ZSTD_compressStream() - streaming compress some of input into output
+ * @zcs:    The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ *          compressed data was written.
+ * @input:  Source buffer. `input->pos` is updated to indicate how much data was
+ *          read. Note that it may not consume the entire input, in which case
+ *          `input->pos < input->size`, and it's up to the caller to present
+ *          remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ *
+ * Return:  A hint for the number of bytes to use as the input for the next
+ *          function call or an error, which can be checked using
+ *          ZSTD_isError().
+ */
+size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output,
+	ZSTD_inBuffer *input);
+/**
+ * ZSTD_flushStream() - flush internal buffers into output
+ * @zcs:    The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ *          compressed data was written.
+ *
+ * ZSTD_flushStream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since ZSTD_flushStream() causes a block to be ended,
+ * calling it too often will degrade the compression ratio.
+ *
+ * Return:  The number of bytes still present within internal buffers or an
+ *          error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+/**
+ * ZSTD_endStream() - flush internal buffers into output and end the frame
+ * @zcs:    The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ *          compressed data was written.
+ *
+ * ZSTD_endStream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
+ *
+ * Return:  The number of bytes still present within internal buffers or an
+ *          error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+
+/**
+ * ZSTD_CStreamInSize() - recommended size for the input buffer
+ *
+ * Return: The recommended size for the input buffer.
+ */
+size_t ZSTD_CStreamInSize(void);
+/**
+ * ZSTD_CStreamOutSize() - recommended size for the output buffer
+ *
+ * When the output buffer is at least this large, it is guaranteed to be large
+ * enough to flush at least one complete compressed block.
+ *
+ * Return: The recommended size for the output buffer.
+ */
+size_t ZSTD_CStreamOutSize(void);
+
+
+
+/*-*****************************************************************************
+ * Streaming decompression - HowTo
+ *
+ * A ZSTD_DStream object is required to track streaming operations.
+ * Use ZSTD_initDStream() to initialize a ZSTD_DStream object.
+ * ZSTD_DStream objects can be re-used multiple times.
+ *
+ * Use ZSTD_decompressStream() repetitively to consume your input.
+ * The function will update both `pos` fields.
+ * If `input->pos < input->size`, some input has not been consumed.
+ * It's up to the caller to present again remaining data.
+ * If `output->pos < output->size`, decoder has flushed everything it could.
+ * Returns 0 iff a frame is completely decoded and fully flushed.
+ * Otherwise it returns a suggested next input size that will never load more
+ * than the current frame.
+ ******************************************************************************/
+
+/**
+ * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream
+ * @maxWindowSize: The maximum window size allowed for compressed frames.
+ *
+ * Return:         A lower bound on the size of the workspace that is passed to
+ *                 ZSTD_initDStream() and ZSTD_initDStream_usingDDict().
+ */
+size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize);
+
+/**
+ * struct ZSTD_DStream - the zstd streaming decompression context
+ */
+typedef struct ZSTD_DStream_s ZSTD_DStream;
+/*===== ZSTD_DStream management functions =====*/
+/**
+ * ZSTD_initDStream() - initialize a zstd streaming decompression context
+ * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace.
+ *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
+ *                 how large the workspace must be.
+ *
+ * Return:         The zstd streaming decompression context.
+ */
+ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace,
+	size_t workspaceSize);
+/**
+ * ZSTD_initDStream_usingDDict() - initialize streaming decompression context
+ * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * @ddict:         The digested dictionary to use for decompression.
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace.
+ *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
+ *                 how large the workspace must be.
+ *
+ * Return:         The zstd streaming decompression context.
+ */
+ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize,
+	const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize);
+
+/*===== Streaming decompression functions =====*/
+/**
+ * ZSTD_resetDStream() - reset the context using parameters from creation
+ * @zds:   The zstd streaming decompression context to reset.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused.
+ *
+ * Return: Zero or an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_resetDStream(ZSTD_DStream *zds);
+/**
+ * ZSTD_decompressStream() - streaming decompress some of input into output
+ * @zds:    The zstd streaming decompression context.
+ * @output: Destination buffer. `output.pos` is updated to indicate how much
+ *          decompressed data was written.
+ * @input:  Source buffer. `input.pos` is updated to indicate how much data was
+ *          read. Note that it may not consume the entire input, in which case
+ *          `input.pos < input.size`, and it's up to the caller to present
+ *          remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ * ZSTD_decompressStream() will not consume the last byte of the frame until
+ * the entire frame is flushed.
+ *
+ * Return:  Returns 0 iff a frame is completely decoded and fully flushed.
+ *          Otherwise returns a hint for the number of bytes to use as the input
+ *          for the next function call or an error, which can be checked using
+ *          ZSTD_isError(). The size hint will never load more than the frame.
+ */
+size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output,
+	ZSTD_inBuffer *input);
+
+/**
+ * ZSTD_DStreamInSize() - recommended size for the input buffer
+ *
+ * Return: The recommended size for the input buffer.
+ */
+size_t ZSTD_DStreamInSize(void);
+/**
+ * ZSTD_DStreamOutSize() - recommended size for the output buffer
+ *
+ * When the output buffer is at least this large, it is guaranteed to be large
+ * enough to flush at least one complete decompressed block.
+ *
+ * Return: The recommended size for the output buffer.
+ */
+size_t ZSTD_DStreamOutSize(void);
+
+
+/* --- Constants ---*/
+#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
+
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
+#define ZSTD_WINDOWLOG_MAX_32  27
+#define ZSTD_WINDOWLOG_MAX_64  27
+#define ZSTD_WINDOWLOG_MAX \
+	((unsigned int)(sizeof(size_t) == 4 \
+		? ZSTD_WINDOWLOG_MAX_32 \
+		: ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN 10
+#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX
+#define ZSTD_HASHLOG_MIN        6
+#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CHAINLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_HASHLOG3_MAX      17
+#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN      1
+/* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_SEARCHLENGTH_MAX   7
+/* only for ZSTD_btopt, other strategies are limited to 4 */
+#define ZSTD_SEARCHLENGTH_MIN   3
+#define ZSTD_TARGETLENGTH_MIN   4
+#define ZSTD_TARGETLENGTH_MAX 999
+
+/* for static allocation */
+#define ZSTD_FRAMEHEADERSIZE_MAX 18
+#define ZSTD_FRAMEHEADERSIZE_MIN  6
+static const size_t ZSTD_frameHeaderSize_prefix = 5;
+static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
+static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
+/* magic number + skippable frame length */
+static const size_t ZSTD_skippableHeaderSize = 8;
+
+
+/*-*************************************
+ * Compressed size functions
+ **************************************/
+
+/**
+ * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame
+ * @src:     Source buffer. It should point to the start of a zstd encoded frame
+ *           or a skippable frame.
+ * @srcSize: The size of the source buffer. It must be at least as large as the
+ *           size of the frame.
+ *
+ * Return:   The compressed size of the frame pointed to by `src` or an error,
+ *           which can be check with ZSTD_isError().
+ *           Suitable to pass to ZSTD_decompress() or similar functions.
+ */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
+
+/*-*************************************
+ * Decompressed size functions
+ **************************************/
+/**
+ * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header
+ * @src:     It should point to the start of a zstd encoded frame.
+ * @srcSize: The size of the source buffer. It must be at least as large as the
+ *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
+ *
+ * Return:   The frame content size stored in the frame header if known.
+ *           `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the
+ *           frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input.
+ */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/**
+ * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames
+ * @src:     It should point to the start of a series of zstd encoded and/or
+ *           skippable frames.
+ * @srcSize: The exact size of the series of frames.
+ *
+ * If any zstd encoded frame in the series doesn't have the frame content size
+ * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always
+ * set when using ZSTD_compress(). The decompressed size can be very large.
+ * If the source is untrusted, the decompressed size could be wrong or
+ * intentionally modified. Always ensure the result fits within the
+ * application's authorized limits. ZSTD_findDecompressedSize() handles multiple
+ * frames, and so it must traverse the input to read each frame header. This is
+ * efficient as most of the data is skipped, however it does mean that all frame
+ * data must be present and valid.
+ *
+ * Return:   Decompressed size of all the data contained in the frames if known.
+ *           `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown.
+ *           `ZSTD_CONTENTSIZE_ERROR` if an error occurred.
+ */
+unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize);
+
+/*-*************************************
+ * Advanced compression functions
+ **************************************/
+/**
+ * ZSTD_checkCParams() - ensure parameter values remain within authorized range
+ * @cParams: The zstd compression parameters.
+ *
+ * Return:   Zero or an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams);
+
+/**
+ * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize
+ * @srcSize:  Optionally the estimated source size, or zero if unknown.
+ * @dictSize: Optionally the estimated dictionary size, or zero if unknown.
+ *
+ * Return:    The optimized parameters.
+ */
+ZSTD_compressionParameters ZSTD_adjustCParams(
+	ZSTD_compressionParameters cParams, unsigned long long srcSize,
+	size_t dictSize);
+
+/*--- Advanced decompression functions ---*/
+
+/**
+ * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame
+ * @buffer: The source buffer to check.
+ * @size:   The size of the source buffer, must be at least 4 bytes.
+ *
+ * Return: True iff the buffer starts with a zstd or skippable frame identifier.
+ */
+unsigned int ZSTD_isFrame(const void *buffer, size_t size);
+
+/**
+ * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary
+ * @dict:     The dictionary buffer.
+ * @dictSize: The size of the dictionary buffer.
+ *
+ * Return:    The dictionary id stored within the dictionary or 0 if the
+ *            dictionary is not a zstd dictionary. If it returns 0 the
+ *            dictionary can still be loaded as a content-only dictionary.
+ */
+unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize);
+
+/**
+ * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict
+ * @ddict: The ddict to find the id of.
+ *
+ * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not
+ *         a zstd dictionary. If it returns 0 `ddict` will be loaded as a
+ *         content-only dictionary.
+ */
+unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict);
+
+/**
+ * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame
+ * @src:     Source buffer. It must be a zstd encoded frame.
+ * @srcSize: The size of the source buffer. It must be at least as large as the
+ *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
+ *
+ * Return:   The dictionary id required to decompress the frame stored within
+ *           `src` or 0 if the dictionary id could not be decoded. It can return
+ *           0 if the frame does not require a dictionary, the dictionary id
+ *           wasn't stored in the frame, `src` is not a zstd frame, or `srcSize`
+ *           is too small.
+ */
+unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize);
+
+/**
+ * struct ZSTD_frameParams - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or 0 if not present.
+ * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @dictID:           The dictionary id, or 0 if not present.
+ * @checksumFlag:     Whether a checksum was used.
+ */
+typedef struct {
+	unsigned long long frameContentSize;
+	unsigned int windowSize;
+	unsigned int dictID;
+	unsigned int checksumFlag;
+} ZSTD_frameParams;
+
+/**
+ * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame
+ * @fparamsPtr: On success the frame parameters are written here.
+ * @src:        The source buffer. It must point to a zstd or skippable frame.
+ * @srcSize:    The size of the source buffer. `ZSTD_frameHeaderSize_max` is
+ *              always large enough to succeed.
+ *
+ * Return:      0 on success. If more data is required it returns how many bytes
+ *              must be provided to make forward progress. Otherwise it returns
+ *              an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src,
+	size_t srcSize);
+
+/*-*****************************************************************************
+ * Buffer-less and synchronous inner streaming functions
+ *
+ * This is an advanced API, giving full control over buffer management, for
+ * users which need direct control over memory.
+ * But it's also a complex one, with many restrictions (documented below).
+ * Prefer using normal streaming API for an easier experience
+ ******************************************************************************/
+
+/*-*****************************************************************************
+ * Buffer-less streaming compression (synchronous mode)
+ *
+ * A ZSTD_CCtx object is required to track streaming operations.
+ * Use ZSTD_initCCtx() to initialize a context.
+ * ZSTD_CCtx object can be re-used multiple times within successive compression
+ * operations.
+ *
+ * Start by initializing a context.
+ * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary
+ * compression,
+ * or ZSTD_compressBegin_advanced(), for finer parameter control.
+ * It's also possible to duplicate a reference context which has already been
+ * initialized, using ZSTD_copyCCtx()
+ *
+ * Then, consume your input using ZSTD_compressContinue().
+ * There are some important considerations to keep in mind when using this
+ * advanced function :
+ * - ZSTD_compressContinue() has no internal buffer. It uses externally provided
+ *   buffer only.
+ * - Interface is synchronous : input is consumed entirely and produce 1+
+ *   (or more) compressed blocks.
+ * - Caller must ensure there is enough space in `dst` to store compressed data
+ *   under worst case scenario. Worst case evaluation is provided by
+ *   ZSTD_compressBound().
+ *   ZSTD_compressContinue() doesn't guarantee recover after a failed
+ *   compression.
+ * - ZSTD_compressContinue() presumes prior input ***is still accessible and
+ *   unmodified*** (up to maximum distance size, see WindowLog).
+ *   It remembers all previous contiguous blocks, plus one separated memory
+ *   segment (which can itself consists of multiple contiguous blocks)
+ * - ZSTD_compressContinue() detects that prior input has been overwritten when
+ *   `src` buffer overlaps. In which case, it will "discard" the relevant memory
+ *   section from its history.
+ *
+ * Finish a frame with ZSTD_compressEnd(), which will write the last block(s)
+ * and optional checksum. It's possible to use srcSize==0, in which case, it
+ * will write a final empty block to end the frame. Without last block mark,
+ * frames will be considered unfinished (corrupted) by decoders.
+ *
+ * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new
+ * frame.
+ ******************************************************************************/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel);
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict,
+	size_t dictSize, int compressionLevel);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict,
+	size_t dictSize, ZSTD_parameters params,
+	unsigned long long pledgedSrcSize);
+size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx,
+	unsigned long long pledgedSrcSize);
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict,
+	unsigned long long pledgedSrcSize);
+size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+
+
+
+/*-*****************************************************************************
+ * Buffer-less streaming decompression (synchronous mode)
+ *
+ * A ZSTD_DCtx object is required to track streaming operations.
+ * Use ZSTD_initDCtx() to initialize a context.
+ * A ZSTD_DCtx object can be re-used multiple times.
+ *
+ * First typical operation is to retrieve frame parameters, using
+ * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide
+ * important information to correctly decode the frame, such as the minimum
+ * rolling buffer size to allocate to decompress data (`windowSize`), and the
+ * dictionary ID used.
+ * Note: content size is optional, it may not be present. 0 means unknown.
+ * Note that these values could be wrong, either because of data malformation,
+ * or because an attacker is spoofing deliberate false information. As a
+ * consequence, check that values remain within valid application range,
+ * especially `windowSize`, before allocation. Each application can set its own
+ * limit, depending on local restrictions. For extended interoperability, it is
+ * recommended to support at least 8 MB.
+ * Frame parameters are extracted from the beginning of the compressed frame.
+ * Data fragment must be large enough to ensure successful decoding, typically
+ * `ZSTD_frameHeaderSize_max` bytes.
+ * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled.
+ *        >0: `srcSize` is too small, provide at least this many bytes.
+ *        errorCode, which can be tested using ZSTD_isError().
+ *
+ * Start decompression, with ZSTD_decompressBegin() or
+ * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared
+ * context, using ZSTD_copyDCtx().
+ *
+ * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue()
+ * alternatively.
+ * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize'
+ * to ZSTD_decompressContinue().
+ * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will
+ * fail.
+ *
+ * The result of ZSTD_decompressContinue() is the number of bytes regenerated
+ * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an
+ * error; it just means ZSTD_decompressContinue() has decoded some metadata
+ * item. It can also be an error code, which can be tested with ZSTD_isError().
+ *
+ * ZSTD_decompressContinue() needs previous data blocks during decompression, up
+ * to `windowSize`. They should preferably be located contiguously, prior to
+ * current block. Alternatively, a round buffer of sufficient size is also
+ * possible. Sufficient size is determined by frame parameters.
+ * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't
+ * follow each other, make sure that either the compressor breaks contiguity at
+ * the same place, or that previous contiguous segment is large enough to
+ * properly handle maximum back-reference.
+ *
+ * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+ * Context can then be reset to start a new decompression.
+ *
+ * Note: it's possible to know if next input to present is a header or a block,
+ * using ZSTD_nextInputType(). This information is not required to properly
+ * decode a frame.
+ *
+ * == Special case: skippable frames ==
+ *
+ * Skippable frames allow integration of user-defined data into a flow of
+ * concatenated frames. Skippable frames will be ignored (skipped) by a
+ * decompressor. The format of skippable frames is as follows:
+ * a) Skippable frame ID - 4 Bytes, Little endian format, any value from
+ *    0x184D2A50 to 0x184D2A5F
+ * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+ * c) Frame Content - any content (User Data) of length equal to Frame Size
+ * For skippable frames ZSTD_decompressContinue() always returns 0.
+ * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0
+ * what means that a frame is skippable.
+ * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might
+ *       actually be a zstd encoded frame with no content. For purposes of
+ *       decompression, it is valid in both cases to skip the frame using
+ *       ZSTD_findFrameCompressedSize() to find its size in bytes.
+ * It also returns frame size as fparamsPtr->frameContentSize.
+ ******************************************************************************/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx);
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
+	size_t dictSize);
+void   ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx);
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx);
+size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+typedef enum {
+	ZSTDnit_frameHeader,
+	ZSTDnit_blockHeader,
+	ZSTDnit_block,
+	ZSTDnit_lastBlock,
+	ZSTDnit_checksum,
+	ZSTDnit_skippableFrame
+} ZSTD_nextInputType_e;
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx);
+
+/*-*****************************************************************************
+ * Block functions
+ *
+ * Block functions produce and decode raw zstd blocks, without frame metadata.
+ * Frame metadata cost is typically ~18 bytes, which can be non-negligible for
+ * very small blocks (< 100 bytes). User will have to take in charge required
+ * information to regenerate data, such as compressed and content sizes.
+ *
+ * A few rules to respect:
+ * - Compressing and decompressing require a context structure
+ *   + Use ZSTD_initCCtx() and ZSTD_initDCtx()
+ * - It is necessary to init context before starting
+ *   + compression : ZSTD_compressBegin()
+ *   + decompression : ZSTD_decompressBegin()
+ *   + variants _usingDict() are also allowed
+ *   + copyCCtx() and copyDCtx() work too
+ * - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
+ *   + If you need to compress more, cut data into multiple blocks
+ *   + Consider using the regular ZSTD_compress() instead, as frame metadata
+ *     costs become negligible when source size is large.
+ * - When a block is considered not compressible enough, ZSTD_compressBlock()
+ *   result will be zero. In which case, nothing is produced into `dst`.
+ *   + User must test for such outcome and deal directly with uncompressed data
+ *   + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!!
+ *   + In case of multiple successive blocks, decoder must be informed of
+ *     uncompressed block existence to follow proper history. Use
+ *     ZSTD_insertBlock() in such a case.
+ ******************************************************************************/
+
+/* Define for static allocation */
+#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)
+/*=====   Raw zstd block functions  =====*/
+size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx);
+size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart,
+	size_t blockSize);
+
+#endif  /* ZSTD_H */

diff --git a/include/net/flow.h b/include/net/flow.h
index 6bbbca8..6fc3e13 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h

@@ -25,6 +25,10 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
+struct flowi_xfrm {
+	__u32			if_id;
+};
+
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -38,6 +42,7 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
+	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -77,6 +82,7 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
+#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -108,6 +114,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
+	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -137,6 +144,7 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
+#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -183,6 +191,7 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
+#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9b6e6a4..1de0397 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h

@@ -21,6 +21,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/flow.h>
+#include <net/gro_cells.h>
 
 #include <linux/interrupt.h>
 
@@ -136,6 +137,7 @@ struct xfrm_state {
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
+	u32			if_id;
 	u32			tfcpad;
 
 	u32			genid;
@@ -155,7 +157,7 @@ struct xfrm_state {
 		int		header_len;
 		int		trailer_len;
 		u32		extra_flags;
-		u32		output_mark;
+		struct xfrm_mark	smark;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
@@ -275,6 +277,13 @@ struct xfrm_replay {
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
+struct xfrm_if_cb {
+	struct xfrm_if	*(*decode_session)(struct sk_buff *skb);
+};
+
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+void xfrm_if_unregister_cb(void);
+
 struct net_device;
 struct xfrm_type;
 struct xfrm_dst;
@@ -534,6 +543,7 @@ struct xfrm_policy {
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
+	u32			if_id;
 	struct xfrm_mark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
@@ -967,6 +977,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
+struct xfrm_if_parms {
+	char name[IFNAMSIZ];	/* name of XFRM device */
+	int link;		/* ifindex of underlying L2 interface */
+	u32 if_id;		/* interface identifyer */
+};
+
+struct xfrm_if {
+	struct xfrm_if __rcu *next;	/* next interface in list */
+	struct net_device *dev;		/* virtual device associated with interface */
+	struct net_device *phydev;	/* physical device */
+	struct net *net;		/* netns for packet i/o */
+	struct xfrm_if_parms p;		/* interface parms */
+
+	struct gro_cells gro_cells;
+};
+
 struct sec_path {
 	atomic_t		refcnt;
 	int			len;
@@ -1166,12 +1192,12 @@ void xfrm_garbage_collect(struct net *net);
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-	return 1; 
-} 
+{
+	return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
 	return 1;
@@ -1258,7 +1284,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
 	if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
 	    (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-	     ipv6_addr_any((struct in6_addr *)saddr) || 
+	     ipv6_addr_any((struct in6_addr *)saddr) ||
 	     ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
 		return 1;
 	return 0;
@@ -1434,7 +1460,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
 				   unsigned short family);
-struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark,
+struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
 				       unsigned short family,
@@ -1563,7 +1589,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
  	return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
@@ -1581,20 +1607,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 		     void *);
 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
 					  u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
-				     u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
+				     int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-				 u8 mode, u32 reqid, u8 proto,
+				 u8 mode, u32 reqid, u32 if_id, u8 proto,
 				 const xfrm_address_t *daddr,
 				 const xfrm_address_t *saddr, int create,
 				 unsigned short family);
@@ -1788,6 +1814,22 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
 	return ret;
 }
 
+static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
+{
+	struct xfrm_mark *m = &x->props.smark;
+
+	return (m->v & m->m) | (mark & ~m->m);
+}
+
+static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
+{
+	int ret = 0;
+
+	if (if_id)
+		ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
+	return ret;
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b4fba66..a39a43b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h

@@ -444,6 +444,16 @@ enum {
 
 #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 
+/* XFRM section */
+enum {
+	IFLA_XFRM_UNSPEC,
+	IFLA_XFRM_LINK,
+	IFLA_XFRM_IF_ID,
+	__IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
 enum macsec_validation_type {
 	MACSEC_VALIDATE_DISABLED = 0,
 	MACSEC_VALIDATE_CHECK = 1,

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 7d75e56..4215bbdb 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h

@@ -304,9 +304,12 @@ enum xfrm_attr_type_t {
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
 	XFRMA_PAD,
 	XFRMA_OFFLOAD_DEV,	/* struct xfrm_state_offload */
-	XFRMA_OUTPUT_MARK,	/* __u32 */
+	XFRMA_SET_MARK,		/* __u32 */
+	XFRMA_SET_MARK_MASK,	/* __u32 */
+	XFRMA_IF_ID,		/* __u32 */
 	__XFRMA_MAX
 
+#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
 #define XFRMA_MAX (__XFRMA_MAX - 1)
 };
 

diff --git a/init/Kconfig b/init/Kconfig
index faff7b0..97ba70f 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -1949,6 +1949,20 @@
 	  SLUB sysfs support. /sys/slab will not exist and there will be
 	  no support for cache validation etc.
 
+config SLUB_MEMCG_SYSFS_ON
+	default n
+	bool "Enable memcg SLUB sysfs support by default" if EXPERT
+	depends on SLUB && SYSFS && MEMCG
+	help
+	  SLUB creates a directory under /sys/kernel/slab for each
+	  allocation cache to host info and debug files. If memory
+	  cgroup is enabled, each cache can have per memory cgroup
+	  caches. SLUB can create the same sysfs directories for these
+	  caches under /sys/kernel/slab/CACHE/cgroup but it can lead
+	  to a very high number of debug files being created. This is
+	  controlled by slub_memcg_sysfs boot parameter and this
+	  config option determines the parameter's default value.
+
 config COMPAT_BRK
 	bool "Disable heap randomization"
 	default y

diff --git a/kernel/fork.c b/kernel/fork.c
index ae3e276..3a00f8d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -345,6 +345,7 @@ void put_task_stack(struct task_struct *tsk)
 void free_task(struct task_struct *tsk)
 {
 	scs_release(tsk);
+	cpufreq_task_times_exit(tsk);
 
 #ifndef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -359,9 +360,6 @@ void free_task(struct task_struct *tsk)
 	 */
 	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
 #endif
-#ifdef CONFIG_CPU_FREQ_TIMES
-	cpufreq_task_times_exit(tsk);
-#endif
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
@@ -1545,6 +1543,8 @@ static __latent_entropy struct task_struct *copy_process(
 	if (!p)
 		goto fork_out;
 
+	cpufreq_task_times_init(p);
+
 	/*
 	 * This _must_ happen before we call free_task(), i.e. before we jump
 	 * to any of the bad_fork_* labels. This is to avoid freeing
@@ -1557,10 +1557,6 @@ static __latent_entropy struct task_struct *copy_process(
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
 
-#ifdef CONFIG_CPU_FREQ_TIMES
-	cpufreq_task_times_init(p);
-#endif
-
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
@@ -2009,6 +2005,8 @@ long _do_fork(unsigned long clone_flags,
 		struct completion vfork;
 		struct pid *pid;
 
+		cpufreq_task_times_alloc(p);
+
 		trace_sched_process_fork(current, p);
 
 		pid = get_task_pid(p, PIDTYPE_PID);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f97e8f6..7fe65dd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -2334,10 +2334,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
-#ifdef CONFIG_CPU_FREQ_TIMES
-	cpufreq_task_times_init(p);
-#endif
-
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
 	__dl_clear_params(p);
@@ -2499,9 +2495,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	cpu = get_cpu();
 
 	__sched_fork(clone_flags, p);
-#ifdef CONFIG_CPU_FREQ_TIMES
-	cpufreq_task_times_alloc(p);
-#endif
 	/*
 	 * We mark the process as NEW here. This guarantees that
 	 * nobody will actually run it, and a signal or other external

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5d99d8b..47a946f 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c

@@ -153,10 +153,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 	/* Account for user time used */
 	acct_account_cputime(p);
 
-#ifdef CONFIG_CPU_FREQ_TIMES
 	/* Account power usage for user time */
 	cpufreq_acct_update_power(p, cputime);
-#endif
 }
 
 /*
@@ -207,10 +205,9 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 
 	/* Account for system time used */
 	acct_account_cputime(p);
-#ifdef CONFIG_CPU_FREQ_TIMES
+
 	/* Account power usage for system time */
 	cpufreq_acct_update_power(p, cputime);
-#endif
 }
 
 /*

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 3d53ab7..f7aea91 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c

@@ -270,14 +270,14 @@ sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
 		return NULL;
 
 	set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
-			sizeof(int), 0644, proc_dointvec_minmax, false);
+			sizeof(int), 0444, proc_dointvec_minmax, false);
 	set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
-			sge->nr_idle_states*sizeof(struct idle_state), 0644,
+			sge->nr_idle_states*sizeof(struct idle_state), 0444,
 			proc_doulongvec_minmax, false);
 	set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
-			sizeof(int), 0644, proc_dointvec_minmax, false);
+			sizeof(int), 0444, proc_dointvec_minmax, false);
 	set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
-			sge->nr_cap_states*sizeof(struct capacity_state), 0644,
+			sge->nr_cap_states*sizeof(struct capacity_state), 0444,
 			proc_doulongvec_minmax, false);
 
 	return table;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b2be181..4faae82 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -6200,13 +6200,14 @@ schedtune_margin(unsigned long signal, long boost)
 	if (boost >= 0) {
 		margin  = SCHED_CAPACITY_SCALE - signal;
 		margin *= boost;
-	} else
+	} else {
 		margin = -signal * boost;
+	}
 
 	margin  = reciprocal_divide(margin, schedtune_spc_rdiv);
-
 	if (boost < 0)
 		margin *= -1;
+
 	return margin;
 }
 
@@ -6277,7 +6278,7 @@ boosted_task_util(struct task_struct *p)
 
 static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
 {
-	return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
+	return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0);
 }
 
 /*

diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index c4c49fa..c7b78682 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c

@@ -110,6 +110,64 @@ __schedtune_accept_deltas(int nrg_delta, int cap_delta,
 
 /*
  * EAS scheduler tunables for task groups.
+ *
+ * When CGroup support is enabled, we have to synchronize two different
+ * paths:
+ *  - slow path: where CGroups are created/updated/removed
+ *  - fast path: where tasks in a CGroups are accounted
+ *
+ * The slow path tracks (a limited number of) CGroups and maps each on a
+ * "boost_group" index. The fastpath accounts tasks currently RUNNABLE on each
+ * "boost_group".
+ *
+ * Once a new CGroup is created, a boost group idx is assigned and the
+ * corresponding "boost_group" marked as valid on each CPU.
+ * Once a CGroup is release, the corresponding "boost_group" is marked as
+ * invalid on each CPU. The CPU boost value (boost_max) is aggregated by
+ * considering only valid boost_groups with a non null tasks counter.
+ *
+ * .:: Locking strategy
+ *
+ * The fast path uses a spin lock for each CPU boost_group which protects the
+ * tasks counter.
+ *
+ * The "valid" and "boost" values of each CPU boost_group is instead
+ * protected by the RCU lock provided by the CGroups callbacks. Thus, only the
+ * slow path can access and modify the boost_group attribtues of each CPU.
+ * The fast path will catch up the most updated values at the next scheduling
+ * event (i.e. enqueue/dequeue).
+ *
+ *                                                        |
+ *                                             SLOW PATH  |   FAST PATH
+ *                              CGroup add/update/remove  |   Scheduler enqueue/dequeue events
+ *                                                        |
+ *                                                        |
+ *                                                        |     DEFINE_PER_CPU(struct boost_groups)
+ *                                                        |     +--------------+----+---+----+----+
+ *                                                        |     |  idle        |    |   |    |    |
+ *                                                        |     |  boost_max   |    |   |    |    |
+ *                                                        |  +---->lock        |    |   |    |    |
+ *  struct schedtune                  allocated_groups    |  |  |  group[    ] |    |   |    |    |
+ *  +------------------------------+         +-------+    |  |  +--+---------+-+----+---+----+----+
+ *  | idx                          |         |       |    |  |     |  valid  |
+ *  | boots / prefer_idle          |         |       |    |  |     |  boost  |
+ *  | perf_{boost/constraints}_idx | <---------+(*)  |    |  |     |  tasks  | <------------+
+ *  | css                          |         +-------+    |  |     +---------+              |
+ *  +-+----------------------------+         |       |    |  |     |         |              |
+ *    ^                                      |       |    |  |     |         |              |
+ *    |                                      +-------+    |  |     +---------+              |
+ *    |                                      |       |    |  |     |         |              |
+ *    |                                      |       |    |  |     |         |              |
+ *    |                                      +-------+    |  |     +---------+              |
+ *    | zmalloc                              |       |    |  |     |         |              |
+ *    |                                      |       |    |  |     |         |              |
+ *    |                                      +-------+    |  |     +---------+              |
+ *    +                              BOOSTGROUPS_COUNT    |  |     BOOSTGROUPS_COUNT        |
+ *  schedtune_boostgroup_init()                           |  +                              |
+ *                                                        |  schedtune_{en,de}queue_task()  |
+ *                                                        |                                 +
+ *                                                        |          schedtune_tasks_update()
+ *                                                        |
  */
 
 /* SchdTune tunables for a group of tasks */
@@ -259,10 +317,11 @@ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
  * maximum per-CPU boosting value.
  */
 struct boost_groups {
-	bool idle;
 	/* Maximum boost value for all RUNNABLE tasks on a CPU */
 	int boost_max;
 	struct {
+		/* True when this boost group maps an actual cgroup */
+		bool valid;
 		/* The boost for tasks on that boost group */
 		int boost;
 		/* Count of RUNNABLE tasks on that boost group */
@@ -358,6 +417,11 @@ schedtune_cpu_update(int cpu)
 	/* The root boost group is always active */
 	boost_max = bg->group[0].boost;
 	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
+
+		/* Ignore non boostgroups not mapping a cgroup */
+		if (!bg->group[idx].valid)
+			continue;
+
 		/*
 		 * A boost group affects a CPU only if it has
 		 * RUNNABLE tasks on that CPU
@@ -367,6 +431,7 @@ schedtune_cpu_update(int cpu)
 
 		boost_max = max(boost_max, bg->group[idx].boost);
 	}
+
 	/* Ensures boost_max is non-negative when all cgroup boost values
 	 * are neagtive. Avoids under-accounting of cpu capacity which may cause
 	 * task stacking and frequency spikes.*/
@@ -386,6 +451,9 @@ schedtune_boostgroup_update(int idx, int boost)
 	for_each_possible_cpu(cpu) {
 		bg = &per_cpu(cpu_boost_groups, cpu);
 
+		/* CGroups are never associated to non active cgroups */
+		BUG_ON(!bg->group[idx].valid);
+
 		/*
 		 * Keep track of current boost values to compute the per CPU
 		 * maximum only when it has been affected by the new value of
@@ -827,24 +895,22 @@ static struct cftype files[] = {
 	{ }	/* terminate */
 };
 
-
-static int
-schedtune_boostgroup_init(struct schedtune *st)
+static void
+schedtune_boostgroup_init(struct schedtune *st, int idx)
 {
 	struct boost_groups *bg;
 	int cpu;
 
-	/* Keep track of allocated boost groups */
-	allocated_group[st->idx] = st;
-
-	/* Initialize the per CPU boost groups */
+	/* Initialize per CPUs boost group support */
 	for_each_possible_cpu(cpu) {
 		bg = &per_cpu(cpu_boost_groups, cpu);
-		bg->group[st->idx].boost = 0;
-		bg->group[st->idx].tasks = 0;
+		bg->group[idx].boost = 0;
+		bg->group[idx].valid = true;
 	}
 
-	return 0;
+	/* Keep track of allocated boost groups */
+	allocated_group[idx] = st;
+	st->idx = idx;
 }
 
 static struct cgroup_subsys_state *
@@ -877,15 +943,10 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 		goto out;
 
 	/* Initialize per CPUs boost group support */
-	st->idx = idx;
-	init_sched_boost(st);
-	if (schedtune_boostgroup_init(st))
-		goto release;
+	schedtune_boostgroup_init(st, idx);
 
 	return &st->css;
 
-release:
-	kfree(st);
 out:
 	return ERR_PTR(-ENOMEM);
 }
@@ -893,8 +954,15 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 static void
 schedtune_boostgroup_release(struct schedtune *st)
 {
-	/* Reset this boost group */
-	schedtune_boostgroup_update(st->idx, 0);
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Reset per CPUs boost group support */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		bg->group[st->idx].valid = false;
+		bg->group[st->idx].boost = 0;
+	}
 
 	/* Keep track of allocated boost groups */
 	allocated_group[st->idx] = NULL;
@@ -905,6 +973,7 @@ schedtune_css_free(struct cgroup_subsys_state *css)
 {
 	struct schedtune *st = css_st(css);
 
+	/* Release per CPUs boost group support */
 	schedtune_boostgroup_release(st);
 	kfree(st);
 }
@@ -930,6 +999,7 @@ schedtune_init_cgroups(void)
 	for_each_possible_cpu(cpu) {
 		bg = &per_cpu(cpu_boost_groups, cpu);
 		memset(bg, 0, sizeof(struct boost_groups));
+		bg->group[0].valid = true;
 		raw_spin_lock_init(&bg->lock);
 	}
 

diff --git a/kernel/sys.c b/kernel/sys.c
index e1c2460..181a7f0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c

@@ -2073,17 +2073,6 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 }
 #endif
 
-int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
-{
-	return -EINVAL;
-}
-
-int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
-				    unsigned long ctrl)
-{
-	return -EINVAL;
-}
-
 #ifdef CONFIG_MMU
 static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
 		struct vm_area_struct **prev,
@@ -2231,6 +2220,17 @@ static int prctl_set_vma(unsigned long opt, unsigned long start,
 }
 #endif
 
+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
+{
+	return -EINVAL;
+}
+
+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+				    unsigned long ctrl)
+{
+	return -EINVAL;
+}
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1f6800c..2656dcb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -105,6 +105,7 @@ extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 #endif
 extern int pid_max;
+extern int extra_free_kbytes;
 extern int pid_max_min, pid_max_max;
 extern int percpu_pagelist_fraction;
 extern int latencytop_enabled;

diff --git a/lib/Kconfig b/lib/Kconfig
index 8b6c41e..e5443cee 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig

@@ -185,6 +185,9 @@
 	  when they need to do cyclic redundancy check according CRC8
 	  algorithm. Module will be called crc8.
 
+config XXHASH
+	tristate
+
 config AUDIT_GENERIC
 	bool
 	depends on AUDIT && !AUDIT_ARCH
@@ -239,6 +242,14 @@
 config LZ4_DECOMPRESS
 	tristate
 
+config ZSTD_COMPRESS
+	select XXHASH
+	tristate
+
+config ZSTD_DECOMPRESS
+	select XXHASH
+	tristate
+
 source "lib/xz/Kconfig"
 
 #

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5b0e9fe..5e70227 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -2085,6 +2085,16 @@
 
 	  If unsure, say N.
 
+config BUG_ON_DATA_CORRUPTION
+	bool "Trigger a BUG when data corruption is detected"
+	select CONFIG_DEBUG_LIST
+	help
+	  Select this option if the kernel should BUG when it encounters
+	  data corruption in kernel memory structures when they get checked
+	  for validity.
+
+	  If unsure, say N.
+
 config PANIC_ON_DATA_CORRUPTION
 	bool "Cause a Kernel Panic When Data Corruption is detected"
 	help

diff --git a/lib/Makefile b/lib/Makefile
index 6bde16d..78e7175 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -19,7 +19,7 @@
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
-	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
+	 sha1.o chacha.o md5.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
@@ -95,6 +95,7 @@
 obj-$(CONFIG_CRC7)	+= crc7.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_CRC8)	+= crc8.o
+obj-$(CONFIG_XXHASH)	+= xxhash.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
 obj-$(CONFIG_842_COMPRESS) += 842/
@@ -108,6 +109,8 @@
 obj-$(CONFIG_LZ4_COMPRESS) += lz4/
 obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
 obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
+obj-$(CONFIG_ZSTD_COMPRESS) += zstd/
+obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/
 obj-$(CONFIG_XZ_DEC) += xz/
 obj-$(CONFIG_RAID6_PQ) += raid6/
 

diff --git a/lib/chacha.c b/lib/chacha.c
new file mode 100644
index 0000000..a46d283
--- /dev/null
+++ b/lib/chacha.c

@@ -0,0 +1,117 @@
+/*
+ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha.h>
+
+static void chacha_permute(u32 *x, int nrounds)
+{
+	int i;
+
+	/* whitelist the allowed round counts */
+	WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
+
+	for (i = 0; i < nrounds; i += 2) {
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
+
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
+
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
+
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
+
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
+
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
+
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
+
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
+	}
+}
+
+/**
+ * chacha_block - generate one keystream block and increment block counter
+ * @state: input state matrix (16 32-bit words)
+ * @stream: output keystream block (64 bytes)
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
+ *
+ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
+ * The caller has already converted the endianness of the input.  This function
+ * also handles incrementing the block counter in the input matrix.
+ */
+void chacha_block(u32 *state, u8 *stream, int nrounds)
+{
+	u32 x[16];
+	int i;
+
+	memcpy(x, state, 64);
+
+	chacha_permute(x, nrounds);
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
+
+	state[12]++;
+}
+EXPORT_SYMBOL(chacha_block);
+
+/**
+ * hchacha_block - abbreviated ChaCha core, for XChaCha
+ * @in: input state matrix (16 32-bit words)
+ * @out: output (8 32-bit words)
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
+ *
+ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
+ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf).  HChaCha
+ * skips the final addition of the initial state, and outputs only certain words
+ * of the state.  It should not be used for streaming directly.
+ */
+void hchacha_block(const u32 *in, u32 *out, int nrounds)
+{
+	u32 x[16];
+
+	memcpy(x, in, 64);
+
+	chacha_permute(x, nrounds);
+
+	memcpy(&out[0], &x[0], 16);
+	memcpy(&out[4], &x[12], 16);
+}
+EXPORT_SYMBOL(hchacha_block);

diff --git a/lib/chacha20.c b/lib/chacha20.c
deleted file mode 100644
index 250ceed..0000000
--- a/lib/chacha20.c
+++ /dev/null

@@ -1,79 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/bitops.h>
-#include <linux/cryptohash.h>
-#include <asm/unaligned.h>
-#include <crypto/chacha20.h>
-
-static inline u32 rotl32(u32 v, u8 n)
-{
-	return (v << n) | (v >> (sizeof(v) * 8 - n));
-}
-
-extern void chacha20_block(u32 *state, void *stream)
-{
-	u32 x[16], *out = stream;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		x[i] = state[i];
-
-	for (i = 0; i < 20; i += 2) {
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
-
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
-
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
-
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
-
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
-
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
-
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
-
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		out[i] = cpu_to_le32(x[i] + state[i]);
-
-	state[12]++;
-}
-EXPORT_SYMBOL(chacha20_block);

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 7a5c1c0..18b872c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c

@@ -2,8 +2,7 @@
  * Copyright 2006, Red Hat, Inc., Dave Jones
  * Released under the General Public License (GPL).
  *
- * This file contains the linked list implementations for
- * DEBUG_LIST.
+ * This file contains the linked list validation for DEBUG_LIST.
  */
 
 #include <linux/export.h>
@@ -14,94 +13,51 @@
 #include <linux/bug.h>
 
 /*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
+ * Check that the data structures for the list manipulations are reasonably
+ * valid. Failures here indicate memory corruption (and possibly an exploit
+ * attempt).
  */
 
-void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next)
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+		      struct list_head *next)
 {
-	WARN(next->prev != prev,
-		"list_add corruption. next->prev should be "
-		"prev (%p), but was %p. (next=%p).\n",
-		prev, next->prev, next);
-	WARN(prev->next != next,
-		"list_add corruption. prev->next should be "
-		"next (%p), but was %p. (prev=%p).\n",
-		next, prev->next, prev);
-	WARN(new == prev || new == next,
-	     "list_add double add: new=%p, prev=%p, next=%p.\n",
-	     new, prev, next);
+	if (CHECK_DATA_CORRUPTION(next->prev != prev,
+			"list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+			prev, next->prev, next) ||
+	    CHECK_DATA_CORRUPTION(prev->next != next,
+			"list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+			next, prev->next, prev) ||
+	    CHECK_DATA_CORRUPTION(new == prev || new == next,
+			"list_add double add: new=%p, prev=%p, next=%p.\n",
+			new, prev, next))
+		return false;
 
-	BUG_ON((prev->next != next || next->prev != prev ||
-		 new == prev || new == next) && PANIC_CORRUPTION);
-
-	next->prev = new;
-	new->next = next;
-	new->prev = prev;
-	WRITE_ONCE(prev->next, new);
+	return true;
 }
-EXPORT_SYMBOL(__list_add);
+EXPORT_SYMBOL(__list_add_valid);
 
-void __list_del_entry(struct list_head *entry)
+bool __list_del_entry_valid(struct list_head *entry)
 {
 	struct list_head *prev, *next;
 
 	prev = entry->prev;
 	next = entry->next;
 
-	if (WARN(next == LIST_POISON1,
-		"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
-		entry, LIST_POISON1) ||
-	    WARN(prev == LIST_POISON2,
-		"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
-		entry, LIST_POISON2) ||
-	    WARN(prev->next != entry,
-		"list_del corruption. prev->next should be %p, "
-		"but was %p\n", entry, prev->next) ||
-	    WARN(next->prev != entry,
-		"list_del corruption. next->prev should be %p, but was %p\n",
-		entry, next->prev)) {
-		BUG_ON(PANIC_CORRUPTION);
-		return;
-	}
+	if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+			"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+			entry, LIST_POISON1) ||
+	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+			"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+			entry, LIST_POISON2) ||
+	    CHECK_DATA_CORRUPTION(prev->next != entry,
+			"list_del corruption. prev->next should be %p, but was %p\n",
+			entry, prev->next) ||
+	    CHECK_DATA_CORRUPTION(next->prev != entry,
+			"list_del corruption. next->prev should be %p, but was %p\n",
+			entry, next->prev))
+		return false;
 
-	__list_del(prev, next);
-}
-EXPORT_SYMBOL(__list_del_entry);
+	return true;
 
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is
- * in an undefined state.
- */
-void list_del(struct list_head *entry)
-{
-	__list_del_entry(entry);
-	entry->next = LIST_POISON1;
-	entry->prev = LIST_POISON2;
 }
-EXPORT_SYMBOL(list_del);
-
-/*
- * RCU variants.
- */
-void __list_add_rcu(struct list_head *new,
-		    struct list_head *prev, struct list_head *next)
-{
-	WARN(next->prev != prev,
-		"list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
-		prev, next->prev, next);
-	WARN(prev->next != next,
-		"list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
-		next, prev->next, prev);
-	new->next = next;
-	new->prev = prev;
-	rcu_assign_pointer(list_next_rcu(prev), new);
-	next->prev = new;
-}
-EXPORT_SYMBOL(__list_add_rcu);
+EXPORT_SYMBOL(__list_del_entry_valid);

diff --git a/lib/xxhash.c b/lib/xxhash.c
new file mode 100644
index 0000000..aa61e2a
--- /dev/null
+++ b/lib/xxhash.c

@@ -0,0 +1,500 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following disclaimer
+ *     in the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: http://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#include <asm/unaligned.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/xxhash.h>
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
+
+#ifdef __LITTLE_ENDIAN
+# define XXH_CPU_LITTLE_ENDIAN 1
+#else
+# define XXH_CPU_LITTLE_ENDIAN 0
+#endif
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 =  668265263U;
+static const uint32_t PRIME32_5 =  374761393U;
+
+static const uint64_t PRIME64_1 = 11400714785074694791ULL;
+static const uint64_t PRIME64_2 = 14029467366897019727ULL;
+static const uint64_t PRIME64_3 =  1609587929392839161ULL;
+static const uint64_t PRIME64_4 =  9650029242287828579ULL;
+static const uint64_t PRIME64_5 =  2870177450012600261ULL;
+
+/*-**************************
+ *  Utils
+ ***************************/
+void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+EXPORT_SYMBOL(xxh32_copy_state);
+
+void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+EXPORT_SYMBOL(xxh64_copy_state);
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+	seed += input * PRIME32_2;
+	seed = xxh_rotl32(seed, 13);
+	seed *= PRIME32_1;
+	return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+	const uint8_t *p = (const uint8_t *)input;
+	const uint8_t *b_end = p + len;
+	uint32_t h32;
+
+	if (len >= 16) {
+		const uint8_t *const limit = b_end - 16;
+		uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+		uint32_t v2 = seed + PRIME32_2;
+		uint32_t v3 = seed + 0;
+		uint32_t v4 = seed - PRIME32_1;
+
+		do {
+			v1 = xxh32_round(v1, get_unaligned_le32(p));
+			p += 4;
+			v2 = xxh32_round(v2, get_unaligned_le32(p));
+			p += 4;
+			v3 = xxh32_round(v3, get_unaligned_le32(p));
+			p += 4;
+			v4 = xxh32_round(v4, get_unaligned_le32(p));
+			p += 4;
+		} while (p <= limit);
+
+		h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+			xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+	} else {
+		h32 = seed + PRIME32_5;
+	}
+
+	h32 += (uint32_t)len;
+
+	while (p + 4 <= b_end) {
+		h32 += get_unaligned_le32(p) * PRIME32_3;
+		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+		p += 4;
+	}
+
+	while (p < b_end) {
+		h32 += (*p) * PRIME32_5;
+		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+		p++;
+	}
+
+	h32 ^= h32 >> 15;
+	h32 *= PRIME32_2;
+	h32 ^= h32 >> 13;
+	h32 *= PRIME32_3;
+	h32 ^= h32 >> 16;
+
+	return h32;
+}
+EXPORT_SYMBOL(xxh32);
+
+static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
+{
+	acc += input * PRIME64_2;
+	acc = xxh_rotl64(acc, 31);
+	acc *= PRIME64_1;
+	return acc;
+}
+
+static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
+{
+	val = xxh64_round(0, val);
+	acc ^= val;
+	acc = acc * PRIME64_1 + PRIME64_4;
+	return acc;
+}
+
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
+{
+	const uint8_t *p = (const uint8_t *)input;
+	const uint8_t *const b_end = p + len;
+	uint64_t h64;
+
+	if (len >= 32) {
+		const uint8_t *const limit = b_end - 32;
+		uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
+		uint64_t v2 = seed + PRIME64_2;
+		uint64_t v3 = seed + 0;
+		uint64_t v4 = seed - PRIME64_1;
+
+		do {
+			v1 = xxh64_round(v1, get_unaligned_le64(p));
+			p += 8;
+			v2 = xxh64_round(v2, get_unaligned_le64(p));
+			p += 8;
+			v3 = xxh64_round(v3, get_unaligned_le64(p));
+			p += 8;
+			v4 = xxh64_round(v4, get_unaligned_le64(p));
+			p += 8;
+		} while (p <= limit);
+
+		h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+			xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+		h64 = xxh64_merge_round(h64, v1);
+		h64 = xxh64_merge_round(h64, v2);
+		h64 = xxh64_merge_round(h64, v3);
+		h64 = xxh64_merge_round(h64, v4);
+
+	} else {
+		h64  = seed + PRIME64_5;
+	}
+
+	h64 += (uint64_t)len;
+
+	while (p + 8 <= b_end) {
+		const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+		h64 ^= k1;
+		h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+		p += 8;
+	}
+
+	if (p + 4 <= b_end) {
+		h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+		h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+		p += 4;
+	}
+
+	while (p < b_end) {
+		h64 ^= (*p) * PRIME64_5;
+		h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+		p++;
+	}
+
+	h64 ^= h64 >> 33;
+	h64 *= PRIME64_2;
+	h64 ^= h64 >> 29;
+	h64 *= PRIME64_3;
+	h64 ^= h64 >> 32;
+
+	return h64;
+}
+EXPORT_SYMBOL(xxh64);
+
+/*-**************************************************
+ * Advanced Hash Functions
+ ***************************************************/
+void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
+{
+	/* use a local state for memcpy() to avoid strict-aliasing warnings */
+	struct xxh32_state state;
+
+	memset(&state, 0, sizeof(state));
+	state.v1 = seed + PRIME32_1 + PRIME32_2;
+	state.v2 = seed + PRIME32_2;
+	state.v3 = seed + 0;
+	state.v4 = seed - PRIME32_1;
+	memcpy(statePtr, &state, sizeof(state));
+}
+EXPORT_SYMBOL(xxh32_reset);
+
+void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
+{
+	/* use a local state for memcpy() to avoid strict-aliasing warnings */
+	struct xxh64_state state;
+
+	memset(&state, 0, sizeof(state));
+	state.v1 = seed + PRIME64_1 + PRIME64_2;
+	state.v2 = seed + PRIME64_2;
+	state.v3 = seed + 0;
+	state.v4 = seed - PRIME64_1;
+	memcpy(statePtr, &state, sizeof(state));
+}
+EXPORT_SYMBOL(xxh64_reset);
+
+int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
+{
+	const uint8_t *p = (const uint8_t *)input;
+	const uint8_t *const b_end = p + len;
+
+	if (input == NULL)
+		return -EINVAL;
+
+	state->total_len_32 += (uint32_t)len;
+	state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
+
+	if (state->memsize + len < 16) { /* fill in tmp buffer */
+		memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
+		state->memsize += (uint32_t)len;
+		return 0;
+	}
+
+	if (state->memsize) { /* some data left from previous update */
+		const uint32_t *p32 = state->mem32;
+
+		memcpy((uint8_t *)(state->mem32) + state->memsize, input,
+			16 - state->memsize);
+
+		state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
+		p32++;
+		state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
+		p32++;
+		state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
+		p32++;
+		state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
+		p32++;
+
+		p += 16-state->memsize;
+		state->memsize = 0;
+	}
+
+	if (p <= b_end - 16) {
+		const uint8_t *const limit = b_end - 16;
+		uint32_t v1 = state->v1;
+		uint32_t v2 = state->v2;
+		uint32_t v3 = state->v3;
+		uint32_t v4 = state->v4;
+
+		do {
+			v1 = xxh32_round(v1, get_unaligned_le32(p));
+			p += 4;
+			v2 = xxh32_round(v2, get_unaligned_le32(p));
+			p += 4;
+			v3 = xxh32_round(v3, get_unaligned_le32(p));
+			p += 4;
+			v4 = xxh32_round(v4, get_unaligned_le32(p));
+			p += 4;
+		} while (p <= limit);
+
+		state->v1 = v1;
+		state->v2 = v2;
+		state->v3 = v3;
+		state->v4 = v4;
+	}
+
+	if (p < b_end) {
+		memcpy(state->mem32, p, (size_t)(b_end-p));
+		state->memsize = (uint32_t)(b_end-p);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(xxh32_update);
+
+uint32_t xxh32_digest(const struct xxh32_state *state)
+{
+	const uint8_t *p = (const uint8_t *)state->mem32;
+	const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
+		state->memsize;
+	uint32_t h32;
+
+	if (state->large_len) {
+		h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
+			xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
+	} else {
+		h32 = state->v3 /* == seed */ + PRIME32_5;
+	}
+
+	h32 += state->total_len_32;
+
+	while (p + 4 <= b_end) {
+		h32 += get_unaligned_le32(p) * PRIME32_3;
+		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+		p += 4;
+	}
+
+	while (p < b_end) {
+		h32 += (*p) * PRIME32_5;
+		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+		p++;
+	}
+
+	h32 ^= h32 >> 15;
+	h32 *= PRIME32_2;
+	h32 ^= h32 >> 13;
+	h32 *= PRIME32_3;
+	h32 ^= h32 >> 16;
+
+	return h32;
+}
+EXPORT_SYMBOL(xxh32_digest);
+
+int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
+{
+	const uint8_t *p = (const uint8_t *)input;
+	const uint8_t *const b_end = p + len;
+
+	if (input == NULL)
+		return -EINVAL;
+
+	state->total_len += len;
+
+	if (state->memsize + len < 32) { /* fill in tmp buffer */
+		memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
+		state->memsize += (uint32_t)len;
+		return 0;
+	}
+
+	if (state->memsize) { /* tmp buffer is full */
+		uint64_t *p64 = state->mem64;
+
+		memcpy(((uint8_t *)p64) + state->memsize, input,
+			32 - state->memsize);
+
+		state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
+		p64++;
+		state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
+		p64++;
+		state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
+		p64++;
+		state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
+
+		p += 32 - state->memsize;
+		state->memsize = 0;
+	}
+
+	if (p + 32 <= b_end) {
+		const uint8_t *const limit = b_end - 32;
+		uint64_t v1 = state->v1;
+		uint64_t v2 = state->v2;
+		uint64_t v3 = state->v3;
+		uint64_t v4 = state->v4;
+
+		do {
+			v1 = xxh64_round(v1, get_unaligned_le64(p));
+			p += 8;
+			v2 = xxh64_round(v2, get_unaligned_le64(p));
+			p += 8;
+			v3 = xxh64_round(v3, get_unaligned_le64(p));
+			p += 8;
+			v4 = xxh64_round(v4, get_unaligned_le64(p));
+			p += 8;
+		} while (p <= limit);
+
+		state->v1 = v1;
+		state->v2 = v2;
+		state->v3 = v3;
+		state->v4 = v4;
+	}
+
+	if (p < b_end) {
+		memcpy(state->mem64, p, (size_t)(b_end-p));
+		state->memsize = (uint32_t)(b_end - p);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(xxh64_update);
+
+uint64_t xxh64_digest(const struct xxh64_state *state)
+{
+	const uint8_t *p = (const uint8_t *)state->mem64;
+	const uint8_t *const b_end = (const uint8_t *)state->mem64 +
+		state->memsize;
+	uint64_t h64;
+
+	if (state->total_len >= 32) {
+		const uint64_t v1 = state->v1;
+		const uint64_t v2 = state->v2;
+		const uint64_t v3 = state->v3;
+		const uint64_t v4 = state->v4;
+
+		h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+			xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+		h64 = xxh64_merge_round(h64, v1);
+		h64 = xxh64_merge_round(h64, v2);
+		h64 = xxh64_merge_round(h64, v3);
+		h64 = xxh64_merge_round(h64, v4);
+	} else {
+		h64  = state->v3 + PRIME64_5;
+	}
+
+	h64 += (uint64_t)state->total_len;
+
+	while (p + 8 <= b_end) {
+		const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+		h64 ^= k1;
+		h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+		p += 8;
+	}
+
+	if (p + 4 <= b_end) {
+		h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+		h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+		p += 4;
+	}
+
+	while (p < b_end) {
+		h64 ^= (*p) * PRIME64_5;
+		h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+		p++;
+	}
+
+	h64 ^= h64 >> 33;
+	h64 *= PRIME64_2;
+	h64 ^= h64 >> 29;
+	h64 *= PRIME64_3;
+	h64 ^= h64 >> 32;
+
+	return h64;
+}
+EXPORT_SYMBOL(xxh64_digest);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("xxHash");

diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
new file mode 100644
index 0000000..dd0a359
--- /dev/null
+++ b/lib/zstd/Makefile

@@ -0,0 +1,18 @@
+obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
+obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
+
+ccflags-y += -O3
+
+# Object files unique to zstd_compress and zstd_decompress
+zstd_compress-y := fse_compress.o huf_compress.o compress.o
+zstd_decompress-y := huf_decompress.o decompress.o
+
+# These object files are shared between the modules.
+# Always add them to zstd_compress.
+# Unless both zstd_compress and zstd_decompress are built in
+# then also add them to zstd_decompress.
+zstd_compress-y += entropy_common.o fse_decompress.o zstd_common.o
+
+ifneq ($(CONFIG_ZSTD_COMPRESS)$(CONFIG_ZSTD_DECOMPRESS),yy)
+	zstd_decompress-y += entropy_common.o fse_decompress.o zstd_common.o
+endif

diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h
new file mode 100644
index 0000000..a826b99
--- /dev/null
+++ b/lib/zstd/bitstream.h

@@ -0,0 +1,374 @@
+/*
+ * bitstream
+ * Part of FSE library
+ * header file (to include)
+ * Copyright (C) 2013-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "error_private.h" /* error codes and messages */
+#include "mem.h"	   /* unaligned access routines */
+
+/*=========================================
+*  Target specific
+=========================================*/
+#define STREAM_ACCUMULATOR_MIN_32 25
+#define STREAM_ACCUMULATOR_MIN_64 57
+#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+*  A critical property of these streams is that they encode and decode in **reverse** direction.
+*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
+*/
+typedef struct {
+	size_t bitContainer;
+	int bitPos;
+	char *startPtr;
+	char *ptr;
+	char *endPtr;
+} BIT_CStream_t;
+
+ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity);
+ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
+ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC);
+ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+	size_t bitContainer;
+	unsigned bitsConsumed;
+	const char *ptr;
+	const char *start;
+} BIT_DStream_t;
+
+typedef enum {
+	BIT_DStream_unfinished = 0,
+	BIT_DStream_endOfBuffer = 1,
+	BIT_DStream_completed = 2,
+	BIT_DStream_overflow = 3
+} BIT_DStream_status; /* result of BIT_reloadDStream() */
+/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize);
+ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits);
+ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD);
+ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD);
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+/*-****************************************
+*  unsafe API
+******************************************/
+ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC);
+/* unsafe version; does not check buffer overflow */
+
+ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); }
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {0,       1,       3,       7,	0xF,      0x1F,     0x3F,     0x7F,      0xFF,
+				    0x1FF,   0x3FF,   0x7FF,   0xFFF,    0x1FFF,   0x3FFF,   0x7FFF,   0xFFFF,    0x1FFFF,
+				    0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(void*)
+ *  @return : 0 if success,
+			  otherwise an error code (can be tested using ERR_isError() ) */
+ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity)
+{
+	bitC->bitContainer = 0;
+	bitC->bitPos = 0;
+	bitC->startPtr = (char *)startPtr;
+	bitC->ptr = bitC->startPtr;
+	bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
+	if (dstCapacity <= sizeof(bitC->ptr))
+		return ERROR(dstSize_tooSmall);
+	return 0;
+}
+
+/*! BIT_addBits() :
+	can add up to 26 bits into `bitC`.
+	Does not check for register overflow ! */
+ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
+{
+	bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+	bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
+ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
+{
+	bitC->bitContainer |= value << bitC->bitPos;
+	bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  unsafe version; does not check buffer overflow */
+ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC)
+{
+	size_t const nbBytes = bitC->bitPos >> 3;
+	ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
+	bitC->ptr += nbBytes;
+	bitC->bitPos &= 7;
+	bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_flushBits() :
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC)
+{
+	size_t const nbBytes = bitC->bitPos >> 3;
+	ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
+	bitC->ptr += nbBytes;
+	if (bitC->ptr > bitC->endPtr)
+		bitC->ptr = bitC->endPtr;
+	bitC->bitPos &= 7;
+	bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+			  or 0 if it could not fit into dstBuffer */
+ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC)
+{
+	BIT_addBitsFast(bitC, 1, 1); /* endMark */
+	BIT_flushBits(bitC);
+
+	if (bitC->ptr >= bitC->endPtr)
+		return 0; /* doesn't fit within authorized budget : cancel */
+
+	return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+*   Initialize a BIT_DStream_t.
+*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+*   `srcSize` must be the *exact* size of the bitStream, in bytes.
+*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize)
+{
+	if (srcSize < 1) {
+		memset(bitD, 0, sizeof(*bitD));
+		return ERROR(srcSize_wrong);
+	}
+
+	if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
+		bitD->start = (const char *)srcBuffer;
+		bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+		bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
+		{
+			BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
+			bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
+			if (lastByte == 0)
+				return ERROR(GENERIC); /* endMark not present */
+		}
+	} else {
+		bitD->start = (const char *)srcBuffer;
+		bitD->ptr = bitD->start;
+		bitD->bitContainer = *(const BYTE *)(bitD->start);
+		switch (srcSize) {
+		case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16);
+		case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24);
+		case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32);
+		case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24;
+		case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16;
+		case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8;
+		default:;
+		}
+		{
+			BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
+			bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+			if (lastByte == 0)
+				return ERROR(GENERIC); /* endMark not present */
+		}
+		bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8;
+	}
+
+	return srcSize;
+}
+
+ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; }
+
+ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; }
+
+ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; }
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ *  @return : value extracted
+ */
+ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits)
+{
+	U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
+	return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits)
+{
+	U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
+	return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask);
+}
+
+ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; }
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ *  @return : extracted value.
+ */
+ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits)
+{
+	size_t const value = BIT_lookBits(bitD, nbBits);
+	BIT_skipBits(bitD, nbBits);
+	return value;
+}
+
+/*! BIT_readBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits)
+{
+	size_t const value = BIT_lookBitsFast(bitD, nbBits);
+	BIT_skipBits(bitD, nbBits);
+	return value;
+}
+
+/*! BIT_reloadDStream() :
+*   Refill `bitD` from buffer previously set in BIT_initDStream() .
+*   This function is safe, it guarantees it will not read beyond src buffer.
+*   @return : status of `BIT_DStream_t` internal register.
+			  if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */
+		return BIT_DStream_overflow;
+
+	if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+		bitD->ptr -= bitD->bitsConsumed >> 3;
+		bitD->bitsConsumed &= 7;
+		bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
+		return BIT_DStream_unfinished;
+	}
+	if (bitD->ptr == bitD->start) {
+		if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8)
+			return BIT_DStream_endOfBuffer;
+		return BIT_DStream_completed;
+	}
+	{
+		U32 nbBytes = bitD->bitsConsumed >> 3;
+		BIT_DStream_status result = BIT_DStream_unfinished;
+		if (bitD->ptr - nbBytes < bitD->start) {
+			nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
+			result = BIT_DStream_endOfBuffer;
+		}
+		bitD->ptr -= nbBytes;
+		bitD->bitsConsumed -= nbBytes * 8;
+		bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
+		return result;
+	}
+}
+
+/*! BIT_endOfDStream() :
+*   @return Tells if DStream has exactly reached its end (all bits consumed).
+*/
+ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream)
+{
+	return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8));
+}
+
+#endif /* BITSTREAM_H_MODULE */

diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
new file mode 100644
index 0000000..f9166cf
--- /dev/null
+++ b/lib/zstd/compress.c

@@ -0,0 +1,3484 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "fse.h"
+#include "huf.h"
+#include "mem.h"
+#include "zstd_internal.h" /* includes zstd.h */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h> /* memset */
+
+/*-*************************************
+*  Constants
+***************************************/
+static const U32 g_searchStrength = 8; /* control skip over incompressible data */
+#define HASH_READ_SIZE 8
+typedef enum { ZSTDcs_created = 0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+
+/*-*************************************
+*  Helper functions
+***************************************/
+size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
+
+/*-*************************************
+*  Sequence storage
+***************************************/
+static void ZSTD_resetSeqStore(seqStore_t *ssPtr)
+{
+	ssPtr->lit = ssPtr->litStart;
+	ssPtr->sequences = ssPtr->sequencesStart;
+	ssPtr->longLengthID = 0;
+}
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CCtx_s {
+	const BYTE *nextSrc;  /* next block here to continue on curr prefix */
+	const BYTE *base;     /* All regular indexes relative to this position */
+	const BYTE *dictBase; /* extDict indexes relative to this position */
+	U32 dictLimit;	/* below that point, need extDict */
+	U32 lowLimit;	 /* below that point, no more data */
+	U32 nextToUpdate;     /* index from which to continue dictionary update */
+	U32 nextToUpdate3;    /* index from which to continue dictionary update */
+	U32 hashLog3;	 /* dispatch table : larger == faster, more memory */
+	U32 loadedDictEnd;    /* index of end of dictionary */
+	U32 forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
+	U32 forceRawDict;     /* Force loading dictionary in "content-only" mode (no header analysis) */
+	ZSTD_compressionStage_e stage;
+	U32 rep[ZSTD_REP_NUM];
+	U32 repToConfirm[ZSTD_REP_NUM];
+	U32 dictID;
+	ZSTD_parameters params;
+	void *workSpace;
+	size_t workSpaceSize;
+	size_t blockSize;
+	U64 frameContentSize;
+	struct xxh64_state xxhState;
+	ZSTD_customMem customMem;
+
+	seqStore_t seqStore; /* sequences storage ptrs */
+	U32 *hashTable;
+	U32 *hashTable3;
+	U32 *chainTable;
+	HUF_CElt *hufTable;
+	U32 flagStaticTables;
+	HUF_repeat flagStaticHufTable;
+	FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+	FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+	FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+	unsigned tmpCounters[HUF_COMPRESS_WORKSPACE_SIZE_U32];
+};
+
+size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams)
+{
+	size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
+	U32 const divider = (cParams.searchLength == 3) ? 3 : 4;
+	size_t const maxNbSeq = blockSize / divider;
+	size_t const tokenSpace = blockSize + 11 * maxNbSeq;
+	size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
+	size_t const hSize = ((size_t)1) << cParams.hashLog;
+	U32 const hashLog3 = (cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
+	size_t const h3Size = ((size_t)1) << hashLog3;
+	size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+	size_t const optSpace =
+	    ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
+	size_t const workspaceSize = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
+				     (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
+
+	return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_CCtx)) + ZSTD_ALIGN(workspaceSize);
+}
+
+static ZSTD_CCtx *ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
+{
+	ZSTD_CCtx *cctx;
+	if (!customMem.customAlloc || !customMem.customFree)
+		return NULL;
+	cctx = (ZSTD_CCtx *)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
+	if (!cctx)
+		return NULL;
+	memset(cctx, 0, sizeof(ZSTD_CCtx));
+	cctx->customMem = customMem;
+	return cctx;
+}
+
+ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
+	ZSTD_CCtx *cctx = ZSTD_createCCtx_advanced(stackMem);
+	if (cctx) {
+		cctx->workSpace = ZSTD_stackAllocAll(cctx->customMem.opaque, &cctx->workSpaceSize);
+	}
+	return cctx;
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx)
+{
+	if (cctx == NULL)
+		return 0; /* support free on NULL */
+	ZSTD_free(cctx->workSpace, cctx->customMem);
+	ZSTD_free(cctx, cctx->customMem);
+	return 0; /* reserved as a potential error code in the future */
+}
+
+const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx) /* hidden interface */ { return &(ctx->seqStore); }
+
+static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx *cctx) { return cctx->params; }
+
+/** ZSTD_checkParams() :
+	ensure param values remain within authorized range.
+	@return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+#define CLAMPCHECK(val, min, max)                                       \
+	{                                                               \
+		if ((val < min) | (val > max))                          \
+			return ERROR(compressionParameter_unsupported); \
+	}
+	CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+	CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+	CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+	CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+	CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
+	CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+	if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2)
+		return ERROR(compressionParameter_unsupported);
+	return 0;
+}
+
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+	U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+	return hashLog - btScale;
+}
+
+/** ZSTD_adjustCParams() :
+	optimize `cPar` for a given input (`srcSize` and `dictSize`).
+	mostly downsizing to reduce memory consumption and initialization.
+	Both `srcSize` and `dictSize` are optional (use 0 if unknown),
+	but if both are 0, no optimization can be done.
+	Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
+ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
+{
+	if (srcSize + dictSize == 0)
+		return cPar; /* no size information available : no adjustment */
+
+	/* resize params, to use less memory when necessary */
+	{
+		U32 const minSrcSize = (srcSize == 0) ? 500 : 0;
+		U64 const rSize = srcSize + dictSize + minSrcSize;
+		if (rSize < ((U64)1 << ZSTD_WINDOWLOG_MAX)) {
+			U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
+			if (cPar.windowLog > srcLog)
+				cPar.windowLog = srcLog;
+		}
+	}
+	if (cPar.hashLog > cPar.windowLog)
+		cPar.hashLog = cPar.windowLog;
+	{
+		U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+		if (cycleLog > cPar.windowLog)
+			cPar.chainLog -= (cycleLog - cPar.windowLog);
+	}
+
+	if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+		cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
+
+	return cPar;
+}
+
+static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
+{
+	return (param1.cParams.hashLog == param2.cParams.hashLog) & (param1.cParams.chainLog == param2.cParams.chainLog) &
+	       (param1.cParams.strategy == param2.cParams.strategy) & ((param1.cParams.searchLength == 3) == (param2.cParams.searchLength == 3));
+}
+
+/*! ZSTD_continueCCtx() :
+	reuse CCtx without reset (note : requires no dictionary) */
+static size_t ZSTD_continueCCtx(ZSTD_CCtx *cctx, ZSTD_parameters params, U64 frameContentSize)
+{
+	U32 const end = (U32)(cctx->nextSrc - cctx->base);
+	cctx->params = params;
+	cctx->frameContentSize = frameContentSize;
+	cctx->lowLimit = end;
+	cctx->dictLimit = end;
+	cctx->nextToUpdate = end + 1;
+	cctx->stage = ZSTDcs_init;
+	cctx->dictID = 0;
+	cctx->loadedDictEnd = 0;
+	{
+		int i;
+		for (i = 0; i < ZSTD_REP_NUM; i++)
+			cctx->rep[i] = repStartValue[i];
+	}
+	cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
+	xxh64_reset(&cctx->xxhState, 0);
+	return 0;
+}
+
+typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
+
+/*! ZSTD_resetCCtx_advanced() :
+	note : `params` must be validated */
+static size_t ZSTD_resetCCtx_advanced(ZSTD_CCtx *zc, ZSTD_parameters params, U64 frameContentSize, ZSTD_compResetPolicy_e const crp)
+{
+	if (crp == ZSTDcrp_continue)
+		if (ZSTD_equivalentParams(params, zc->params)) {
+			zc->flagStaticTables = 0;
+			zc->flagStaticHufTable = HUF_repeat_none;
+			return ZSTD_continueCCtx(zc, params, frameContentSize);
+		}
+
+	{
+		size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
+		U32 const divider = (params.cParams.searchLength == 3) ? 3 : 4;
+		size_t const maxNbSeq = blockSize / divider;
+		size_t const tokenSpace = blockSize + 11 * maxNbSeq;
+		size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
+		size_t const hSize = ((size_t)1) << params.cParams.hashLog;
+		U32 const hashLog3 = (params.cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
+		size_t const h3Size = ((size_t)1) << hashLog3;
+		size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+		void *ptr;
+
+		/* Check if workSpace is large enough, alloc a new one if needed */
+		{
+			size_t const optSpace = ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) +
+						(ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
+			size_t const neededSpace = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
+						   (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
+			if (zc->workSpaceSize < neededSpace) {
+				ZSTD_free(zc->workSpace, zc->customMem);
+				zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
+				if (zc->workSpace == NULL)
+					return ERROR(memory_allocation);
+				zc->workSpaceSize = neededSpace;
+			}
+		}
+
+		if (crp != ZSTDcrp_noMemset)
+			memset(zc->workSpace, 0, tableSpace); /* reset tables only */
+		xxh64_reset(&zc->xxhState, 0);
+		zc->hashLog3 = hashLog3;
+		zc->hashTable = (U32 *)(zc->workSpace);
+		zc->chainTable = zc->hashTable + hSize;
+		zc->hashTable3 = zc->chainTable + chainSize;
+		ptr = zc->hashTable3 + h3Size;
+		zc->hufTable = (HUF_CElt *)ptr;
+		zc->flagStaticTables = 0;
+		zc->flagStaticHufTable = HUF_repeat_none;
+		ptr = ((U32 *)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
+
+		zc->nextToUpdate = 1;
+		zc->nextSrc = NULL;
+		zc->base = NULL;
+		zc->dictBase = NULL;
+		zc->dictLimit = 0;
+		zc->lowLimit = 0;
+		zc->params = params;
+		zc->blockSize = blockSize;
+		zc->frameContentSize = frameContentSize;
+		{
+			int i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				zc->rep[i] = repStartValue[i];
+		}
+
+		if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
+			zc->seqStore.litFreq = (U32 *)ptr;
+			zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1 << Litbits);
+			zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL + 1);
+			zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML + 1);
+			ptr = zc->seqStore.offCodeFreq + (MaxOff + 1);
+			zc->seqStore.matchTable = (ZSTD_match_t *)ptr;
+			ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM + 1;
+			zc->seqStore.priceTable = (ZSTD_optimal_t *)ptr;
+			ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM + 1;
+			zc->seqStore.litLengthSum = 0;
+		}
+		zc->seqStore.sequencesStart = (seqDef *)ptr;
+		ptr = zc->seqStore.sequencesStart + maxNbSeq;
+		zc->seqStore.llCode = (BYTE *)ptr;
+		zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
+		zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
+		zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
+
+		zc->stage = ZSTDcs_init;
+		zc->dictID = 0;
+		zc->loadedDictEnd = 0;
+
+		return 0;
+	}
+}
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx)
+{
+	int i;
+	for (i = 0; i < ZSTD_REP_NUM; i++)
+		cctx->rep[i] = 0;
+}
+
+/*! ZSTD_copyCCtx() :
+*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+*   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx *dstCCtx, const ZSTD_CCtx *srcCCtx, unsigned long long pledgedSrcSize)
+{
+	if (srcCCtx->stage != ZSTDcs_init)
+		return ERROR(stage_wrong);
+
+	memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
+	{
+		ZSTD_parameters params = srcCCtx->params;
+		params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
+		ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
+	}
+
+	/* copy tables */
+	{
+		size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
+		size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
+		size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
+		size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+		memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
+	}
+
+	/* copy dictionary offsets */
+	dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
+	dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3;
+	dstCCtx->nextSrc = srcCCtx->nextSrc;
+	dstCCtx->base = srcCCtx->base;
+	dstCCtx->dictBase = srcCCtx->dictBase;
+	dstCCtx->dictLimit = srcCCtx->dictLimit;
+	dstCCtx->lowLimit = srcCCtx->lowLimit;
+	dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd;
+	dstCCtx->dictID = srcCCtx->dictID;
+
+	/* copy entropy tables */
+	dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
+	dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
+	if (srcCCtx->flagStaticTables) {
+		memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
+		memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
+		memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
+	}
+	if (srcCCtx->flagStaticHufTable) {
+		memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256 * 4);
+	}
+
+	return 0;
+}
+
+/*! ZSTD_reduceTable() :
+*   reduce table indexes by `reducerValue` */
+static void ZSTD_reduceTable(U32 *const table, U32 const size, U32 const reducerValue)
+{
+	U32 u;
+	for (u = 0; u < size; u++) {
+		if (table[u] < reducerValue)
+			table[u] = 0;
+		else
+			table[u] -= reducerValue;
+	}
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex(ZSTD_CCtx *zc, const U32 reducerValue)
+{
+	{
+		U32 const hSize = 1 << zc->params.cParams.hashLog;
+		ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
+	}
+
+	{
+		U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
+		ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
+	}
+
+	{
+		U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
+		ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue);
+	}
+}
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* See doc/zstd_compression_format.md for detailed format description */
+
+size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	if (srcSize + ZSTD_blockHeaderSize > dstCapacity)
+		return ERROR(dstSize_tooSmall);
+	memcpy((BYTE *)dst + ZSTD_blockHeaderSize, src, srcSize);
+	ZSTD_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
+	return ZSTD_blockHeaderSize + srcSize;
+}
+
+static size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	BYTE *const ostart = (BYTE * const)dst;
+	U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
+
+	if (srcSize + flSize > dstCapacity)
+		return ERROR(dstSize_tooSmall);
+
+	switch (flSize) {
+	case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_basic + (srcSize << 3)); break;
+	case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_basic + (1 << 2) + (srcSize << 4))); break;
+	default: /*note : should not be necessary : flSize is within {1,2,3} */
+	case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_basic + (3 << 2) + (srcSize << 4))); break;
+	}
+
+	memcpy(ostart + flSize, src, srcSize);
+	return srcSize + flSize;
+}
+
+static size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	BYTE *const ostart = (BYTE * const)dst;
+	U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
+
+	(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+	switch (flSize) {
+	case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_rle + (srcSize << 3)); break;
+	case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_rle + (1 << 2) + (srcSize << 4))); break;
+	default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
+	case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_rle + (3 << 2) + (srcSize << 4))); break;
+	}
+
+	ostart[flSize] = *(const BYTE *)src;
+	return flSize + 1;
+}
+
+static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
+
+static size_t ZSTD_compressLiterals(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	size_t const minGain = ZSTD_minGain(srcSize);
+	size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+	BYTE *const ostart = (BYTE *)dst;
+	U32 singleStream = srcSize < 256;
+	symbolEncodingType_e hType = set_compressed;
+	size_t cLitSize;
+
+/* small ? don't even attempt compression (speed opt) */
+#define LITERAL_NOENTROPY 63
+	{
+		size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
+		if (srcSize <= minLitSize)
+			return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+	}
+
+	if (dstCapacity < lhSize + 1)
+		return ERROR(dstSize_tooSmall); /* not enough space for compression */
+	{
+		HUF_repeat repeat = zc->flagStaticHufTable;
+		int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+		if (repeat == HUF_repeat_valid && lhSize == 3)
+			singleStream = 1;
+		cLitSize = singleStream ? HUF_compress1X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
+								sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
+					: HUF_compress4X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
+								sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
+		if (repeat != HUF_repeat_none) {
+			hType = set_repeat;
+		} /* reused the existing table */
+		else {
+			zc->flagStaticHufTable = HUF_repeat_check;
+		} /* now have a table to reuse */
+	}
+
+	if ((cLitSize == 0) | (cLitSize >= srcSize - minGain)) {
+		zc->flagStaticHufTable = HUF_repeat_none;
+		return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+	}
+	if (cLitSize == 1) {
+		zc->flagStaticHufTable = HUF_repeat_none;
+		return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+	}
+
+	/* Build header */
+	switch (lhSize) {
+	case 3: /* 2 - 2 - 10 - 10 */
+	{
+		U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 14);
+		ZSTD_writeLE24(ostart, lhc);
+		break;
+	}
+	case 4: /* 2 - 2 - 14 - 14 */
+	{
+		U32 const lhc = hType + (2 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 18);
+		ZSTD_writeLE32(ostart, lhc);
+		break;
+	}
+	default: /* should not be necessary, lhSize is only {3,4,5} */
+	case 5:  /* 2 - 2 - 18 - 18 */
+	{
+		U32 const lhc = hType + (3 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 22);
+		ZSTD_writeLE32(ostart, lhc);
+		ostart[4] = (BYTE)(cLitSize >> 10);
+		break;
+	}
+	}
+	return lhSize + cLitSize;
+}
+
+static const BYTE LL_Code[64] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18,
+				 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
+				 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24};
+
+static const BYTE ML_Code[128] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+				  26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38,
+				  38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+				  40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42,
+				  42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42};
+
+void ZSTD_seqToCodes(const seqStore_t *seqStorePtr)
+{
+	BYTE const LL_deltaCode = 19;
+	BYTE const ML_deltaCode = 36;
+	const seqDef *const sequences = seqStorePtr->sequencesStart;
+	BYTE *const llCodeTable = seqStorePtr->llCode;
+	BYTE *const ofCodeTable = seqStorePtr->ofCode;
+	BYTE *const mlCodeTable = seqStorePtr->mlCode;
+	U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+	U32 u;
+	for (u = 0; u < nbSeq; u++) {
+		U32 const llv = sequences[u].litLength;
+		U32 const mlv = sequences[u].matchLength;
+		llCodeTable[u] = (llv > 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
+		ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+		mlCodeTable[u] = (mlv > 127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
+	}
+	if (seqStorePtr->longLengthID == 1)
+		llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+	if (seqStorePtr->longLengthID == 2)
+		mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+}
+
+ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity)
+{
+	const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+	const seqStore_t *seqStorePtr = &(zc->seqStore);
+	FSE_CTable *CTable_LitLength = zc->litlengthCTable;
+	FSE_CTable *CTable_OffsetBits = zc->offcodeCTable;
+	FSE_CTable *CTable_MatchLength = zc->matchlengthCTable;
+	U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
+	const seqDef *const sequences = seqStorePtr->sequencesStart;
+	const BYTE *const ofCodeTable = seqStorePtr->ofCode;
+	const BYTE *const llCodeTable = seqStorePtr->llCode;
+	const BYTE *const mlCodeTable = seqStorePtr->mlCode;
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *const oend = ostart + dstCapacity;
+	BYTE *op = ostart;
+	size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+	BYTE *seqHead;
+
+	U32 *count;
+	S16 *norm;
+	U32 *workspace;
+	size_t workspaceSize = sizeof(zc->tmpCounters);
+	{
+		size_t spaceUsed32 = 0;
+		count = (U32 *)zc->tmpCounters + spaceUsed32;
+		spaceUsed32 += MaxSeq + 1;
+		norm = (S16 *)((U32 *)zc->tmpCounters + spaceUsed32);
+		spaceUsed32 += ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
+
+		workspace = (U32 *)zc->tmpCounters + spaceUsed32;
+		workspaceSize -= (spaceUsed32 << 2);
+	}
+
+	/* Compress literals */
+	{
+		const BYTE *const literals = seqStorePtr->litStart;
+		size_t const litSize = seqStorePtr->lit - literals;
+		size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
+		if (ZSTD_isError(cSize))
+			return cSize;
+		op += cSize;
+	}
+
+	/* Sequences Header */
+	if ((oend - op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */)
+		return ERROR(dstSize_tooSmall);
+	if (nbSeq < 0x7F)
+		*op++ = (BYTE)nbSeq;
+	else if (nbSeq < LONGNBSEQ)
+		op[0] = (BYTE)((nbSeq >> 8) + 0x80), op[1] = (BYTE)nbSeq, op += 2;
+	else
+		op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3;
+	if (nbSeq == 0)
+		return op - ostart;
+
+	/* seqHead : flags for FSE encoding type */
+	seqHead = op++;
+
+#define MIN_SEQ_FOR_DYNAMIC_FSE 64
+#define MAX_SEQ_FOR_STATIC_FSE 1000
+
+	/* convert length/distances into codes */
+	ZSTD_seqToCodes(seqStorePtr);
+
+	/* CTable for Literal Lengths */
+	{
+		U32 max = MaxLL;
+		size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
+		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+			*op++ = llCodeTable[0];
+			FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+			LLtype = set_rle;
+		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+			LLtype = set_repeat;
+		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog - 1)))) {
+			FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, workspace, workspaceSize);
+			LLtype = set_basic;
+		} else {
+			size_t nbSeq_1 = nbSeq;
+			const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+			if (count[llCodeTable[nbSeq - 1]] > 1) {
+				count[llCodeTable[nbSeq - 1]]--;
+				nbSeq_1--;
+			}
+			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+			{
+				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
+				if (FSE_isError(NCountSize))
+					return NCountSize;
+				op += NCountSize;
+			}
+			FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, workspace, workspaceSize);
+			LLtype = set_compressed;
+		}
+	}
+
+	/* CTable for Offsets */
+	{
+		U32 max = MaxOff;
+		size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
+		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+			*op++ = ofCodeTable[0];
+			FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+			Offtype = set_rle;
+		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+			Offtype = set_repeat;
+		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog - 1)))) {
+			FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, workspace, workspaceSize);
+			Offtype = set_basic;
+		} else {
+			size_t nbSeq_1 = nbSeq;
+			const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+			if (count[ofCodeTable[nbSeq - 1]] > 1) {
+				count[ofCodeTable[nbSeq - 1]]--;
+				nbSeq_1--;
+			}
+			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+			{
+				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
+				if (FSE_isError(NCountSize))
+					return NCountSize;
+				op += NCountSize;
+			}
+			FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, workspace, workspaceSize);
+			Offtype = set_compressed;
+		}
+	}
+
+	/* CTable for MatchLengths */
+	{
+		U32 max = MaxML;
+		size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
+		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+			*op++ = *mlCodeTable;
+			FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+			MLtype = set_rle;
+		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+			MLtype = set_repeat;
+		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog - 1)))) {
+			FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, workspace, workspaceSize);
+			MLtype = set_basic;
+		} else {
+			size_t nbSeq_1 = nbSeq;
+			const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+			if (count[mlCodeTable[nbSeq - 1]] > 1) {
+				count[mlCodeTable[nbSeq - 1]]--;
+				nbSeq_1--;
+			}
+			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+			{
+				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
+				if (FSE_isError(NCountSize))
+					return NCountSize;
+				op += NCountSize;
+			}
+			FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, workspace, workspaceSize);
+			MLtype = set_compressed;
+		}
+	}
+
+	*seqHead = (BYTE)((LLtype << 6) + (Offtype << 4) + (MLtype << 2));
+	zc->flagStaticTables = 0;
+
+	/* Encoding Sequences */
+	{
+		BIT_CStream_t blockStream;
+		FSE_CState_t stateMatchLength;
+		FSE_CState_t stateOffsetBits;
+		FSE_CState_t stateLitLength;
+
+		CHECK_E(BIT_initCStream(&blockStream, op, oend - op), dstSize_tooSmall); /* not enough space remaining */
+
+		/* first symbols */
+		FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq - 1]);
+		FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq - 1]);
+		FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq - 1]);
+		BIT_addBits(&blockStream, sequences[nbSeq - 1].litLength, LL_bits[llCodeTable[nbSeq - 1]]);
+		if (ZSTD_32bits())
+			BIT_flushBits(&blockStream);
+		BIT_addBits(&blockStream, sequences[nbSeq - 1].matchLength, ML_bits[mlCodeTable[nbSeq - 1]]);
+		if (ZSTD_32bits())
+			BIT_flushBits(&blockStream);
+		if (longOffsets) {
+			U32 const ofBits = ofCodeTable[nbSeq - 1];
+			int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
+			if (extraBits) {
+				BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, extraBits);
+				BIT_flushBits(&blockStream);
+			}
+			BIT_addBits(&blockStream, sequences[nbSeq - 1].offset >> extraBits, ofBits - extraBits);
+		} else {
+			BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, ofCodeTable[nbSeq - 1]);
+		}
+		BIT_flushBits(&blockStream);
+
+		{
+			size_t n;
+			for (n = nbSeq - 2; n < nbSeq; n--) { /* intentional underflow */
+				BYTE const llCode = llCodeTable[n];
+				BYTE const ofCode = ofCodeTable[n];
+				BYTE const mlCode = mlCodeTable[n];
+				U32 const llBits = LL_bits[llCode];
+				U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
+				U32 const mlBits = ML_bits[mlCode];
+				/* (7)*/							    /* (7)*/
+				FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */  /* 15 */
+				FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
+				if (ZSTD_32bits())
+					BIT_flushBits(&blockStream);				  /* (7)*/
+				FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
+				if (ZSTD_32bits() || (ofBits + mlBits + llBits >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
+					BIT_flushBits(&blockStream); /* (7)*/
+				BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+				if (ZSTD_32bits() && ((llBits + mlBits) > 24))
+					BIT_flushBits(&blockStream);
+				BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+				if (ZSTD_32bits())
+					BIT_flushBits(&blockStream); /* (7)*/
+				if (longOffsets) {
+					int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
+					if (extraBits) {
+						BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+						BIT_flushBits(&blockStream); /* (7)*/
+					}
+					BIT_addBits(&blockStream, sequences[n].offset >> extraBits, ofBits - extraBits); /* 31 */
+				} else {
+					BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
+				}
+				BIT_flushBits(&blockStream); /* (7)*/
+			}
+		}
+
+		FSE_flushCState(&blockStream, &stateMatchLength);
+		FSE_flushCState(&blockStream, &stateOffsetBits);
+		FSE_flushCState(&blockStream, &stateLitLength);
+
+		{
+			size_t const streamSize = BIT_closeCStream(&blockStream);
+			if (streamSize == 0)
+				return ERROR(dstSize_tooSmall); /* not enough space */
+			op += streamSize;
+		}
+	}
+	return op - ostart;
+}
+
+ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
+{
+	size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity);
+	size_t const minGain = ZSTD_minGain(srcSize);
+	size_t const maxCSize = srcSize - minGain;
+	/* If the srcSize <= dstCapacity, then there is enough space to write a
+	 * raw uncompressed block. Since we ran out of space, the block must not
+	 * be compressible, so fall back to a raw uncompressed block.
+	 */
+	int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
+	int i;
+
+	if (ZSTD_isError(cSize) && !uncompressibleError)
+		return cSize;
+	if (cSize >= maxCSize || uncompressibleError) {
+		zc->flagStaticHufTable = HUF_repeat_none;
+		return 0;
+	}
+	/* confirm repcodes */
+	for (i = 0; i < ZSTD_REP_NUM; i++)
+		zc->rep[i] = zc->repToConfirm[i];
+	return cSize;
+}
+
+/*! ZSTD_storeSeq() :
+	Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
+	`offsetCode` : distance to match, or 0 == repCode.
+	`matchCode` : matchLength - MINMATCH
+*/
+ZSTD_STATIC void ZSTD_storeSeq(seqStore_t *seqStorePtr, size_t litLength, const void *literals, U32 offsetCode, size_t matchCode)
+{
+	/* copy Literals */
+	ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+	seqStorePtr->lit += litLength;
+
+	/* literal Length */
+	if (litLength > 0xFFFF) {
+		seqStorePtr->longLengthID = 1;
+		seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+	}
+	seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+	/* match offset */
+	seqStorePtr->sequences[0].offset = offsetCode + 1;
+
+	/* match Length */
+	if (matchCode > 0xFFFF) {
+		seqStorePtr->longLengthID = 2;
+		seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+	}
+	seqStorePtr->sequences[0].matchLength = (U16)matchCode;
+
+	seqStorePtr->sequences++;
+}
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes(register size_t val)
+{
+	if (ZSTD_isLittleEndian()) {
+		if (ZSTD_64bits()) {
+			return (__builtin_ctzll((U64)val) >> 3);
+		} else { /* 32 bits */
+			return (__builtin_ctz((U32)val) >> 3);
+		}
+	} else { /* Big Endian CPU */
+		if (ZSTD_64bits()) {
+			return (__builtin_clzll(val) >> 3);
+		} else { /* 32 bits */
+			return (__builtin_clz((U32)val) >> 3);
+		}
+	}
+}
+
+static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *const pInLimit)
+{
+	const BYTE *const pStart = pIn;
+	const BYTE *const pInLoopLimit = pInLimit - (sizeof(size_t) - 1);
+
+	while (pIn < pInLoopLimit) {
+		size_t const diff = ZSTD_readST(pMatch) ^ ZSTD_readST(pIn);
+		if (!diff) {
+			pIn += sizeof(size_t);
+			pMatch += sizeof(size_t);
+			continue;
+		}
+		pIn += ZSTD_NbCommonBytes(diff);
+		return (size_t)(pIn - pStart);
+	}
+	if (ZSTD_64bits())
+		if ((pIn < (pInLimit - 3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) {
+			pIn += 4;
+			pMatch += 4;
+		}
+	if ((pIn < (pInLimit - 1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) {
+		pIn += 2;
+		pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (size_t)(pIn - pStart);
+}
+
+/** ZSTD_count_2segments() :
+*   can count match length with `ip` & `match` in 2 different segments.
+*   convention : on reaching mEnd, match count continue starting from iStart
+*/
+static size_t ZSTD_count_2segments(const BYTE *ip, const BYTE *match, const BYTE *iEnd, const BYTE *mEnd, const BYTE *iStart)
+{
+	const BYTE *const vEnd = MIN(ip + (mEnd - match), iEnd);
+	size_t const matchLength = ZSTD_count(ip, match, vEnd);
+	if (match + matchLength != mEnd)
+		return matchLength;
+	return matchLength + ZSTD_count(ip + matchLength, iStart, iEnd);
+}
+
+/*-*************************************
+*  Hashes
+***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32 - 24)) * prime3bytes) >> (32 - h); }
+ZSTD_STATIC size_t ZSTD_hash3Ptr(const void *ptr, U32 h) { return ZSTD_hash3(ZSTD_readLE32(ptr), h); } /* only in zstd_opt.h */
+
+static const U32 prime4bytes = 2654435761U;
+static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32 - h); }
+static size_t ZSTD_hash4Ptr(const void *ptr, U32 h) { return ZSTD_hash4(ZSTD_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64 - 40)) * prime5bytes) >> (64 - h)); }
+static size_t ZSTD_hash5Ptr(const void *p, U32 h) { return ZSTD_hash5(ZSTD_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64 - 48)) * prime6bytes) >> (64 - h)); }
+static size_t ZSTD_hash6Ptr(const void *p, U32 h) { return ZSTD_hash6(ZSTD_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64 - 56)) * prime7bytes) >> (64 - h)); }
+static size_t ZSTD_hash7Ptr(const void *p, U32 h) { return ZSTD_hash7(ZSTD_readLE64(p), h); }
+
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u)*prime8bytes) >> (64 - h)); }
+static size_t ZSTD_hash8Ptr(const void *p, U32 h) { return ZSTD_hash8(ZSTD_readLE64(p), h); }
+
+static size_t ZSTD_hashPtr(const void *p, U32 hBits, U32 mls)
+{
+	switch (mls) {
+	// case 3: return ZSTD_hash3Ptr(p, hBits);
+	default:
+	case 4: return ZSTD_hash4Ptr(p, hBits);
+	case 5: return ZSTD_hash5Ptr(p, hBits);
+	case 6: return ZSTD_hash6Ptr(p, hBits);
+	case 7: return ZSTD_hash7Ptr(p, hBits);
+	case 8: return ZSTD_hash8Ptr(p, hBits);
+	}
+}
+
+/*-*************************************
+*  Fast Scan
+***************************************/
+static void ZSTD_fillHashTable(ZSTD_CCtx *zc, const void *end, const U32 mls)
+{
+	U32 *const hashTable = zc->hashTable;
+	U32 const hBits = zc->params.cParams.hashLog;
+	const BYTE *const base = zc->base;
+	const BYTE *ip = base + zc->nextToUpdate;
+	const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
+	const size_t fastHashFillStep = 3;
+
+	while (ip <= iend) {
+		hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
+		ip += fastHashFillStep;
+	}
+}
+
+FORCE_INLINE
+void ZSTD_compressBlock_fast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
+{
+	U32 *const hashTable = cctx->hashTable;
+	U32 const hBits = cctx->params.cParams.hashLog;
+	seqStore_t *seqStorePtr = &(cctx->seqStore);
+	const BYTE *const base = cctx->base;
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const U32 lowestIndex = cctx->dictLimit;
+	const BYTE *const lowest = base + lowestIndex;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - HASH_READ_SIZE;
+	U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
+	U32 offsetSaved = 0;
+
+	/* init */
+	ip += (ip == lowest);
+	{
+		U32 const maxRep = (U32)(ip - lowest);
+		if (offset_2 > maxRep)
+			offsetSaved = offset_2, offset_2 = 0;
+		if (offset_1 > maxRep)
+			offsetSaved = offset_1, offset_1 = 0;
+	}
+
+	/* Main Search Loop */
+	while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
+		size_t mLength;
+		size_t const h = ZSTD_hashPtr(ip, hBits, mls);
+		U32 const curr = (U32)(ip - base);
+		U32 const matchIndex = hashTable[h];
+		const BYTE *match = base + matchIndex;
+		hashTable[h] = curr; /* update hash table */
+
+		if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) {
+			mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
+			ip++;
+			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
+		} else {
+			U32 offset;
+			if ((matchIndex <= lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
+				ip += ((ip - anchor) >> g_searchStrength) + 1;
+				continue;
+			}
+			mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
+			offset = (U32)(ip - match);
+			while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
+				ip--;
+				match--;
+				mLength++;
+			} /* catch up */
+			offset_2 = offset_1;
+			offset_1 = offset;
+
+			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+		}
+
+		/* match found */
+		ip += mLength;
+		anchor = ip;
+
+		if (ip <= ilimit) {
+			/* Fill Table */
+			hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; /* here because curr+2 could be > iend-8 */
+			hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
+			/* check immediate repcode */
+			while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
+				/* store sequence */
+				size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
+				{
+					U32 const tmpOff = offset_2;
+					offset_2 = offset_1;
+					offset_1 = tmpOff;
+				} /* swap offset_2 <=> offset_1 */
+				hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
+				ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
+				ip += rLength;
+				anchor = ip;
+				continue; /* faster when present ... (?) */
+			}
+		}
+	}
+
+	/* save reps for next block */
+	cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+	cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+static void ZSTD_compressBlock_fast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	const U32 mls = ctx->params.cParams.searchLength;
+	switch (mls) {
+	default: /* includes case 3 */
+	case 4: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
+	case 5: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
+	case 6: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
+	case 7: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
+	}
+}
+
+static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
+{
+	U32 *hashTable = ctx->hashTable;
+	const U32 hBits = ctx->params.cParams.hashLog;
+	seqStore_t *seqStorePtr = &(ctx->seqStore);
+	const BYTE *const base = ctx->base;
+	const BYTE *const dictBase = ctx->dictBase;
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const U32 lowestIndex = ctx->lowLimit;
+	const BYTE *const dictStart = dictBase + lowestIndex;
+	const U32 dictLimit = ctx->dictLimit;
+	const BYTE *const lowPrefixPtr = base + dictLimit;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - 8;
+	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
+
+	/* Search Loop */
+	while (ip < ilimit) { /* < instead of <=, because (ip+1) */
+		const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+		const U32 matchIndex = hashTable[h];
+		const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
+		const BYTE *match = matchBase + matchIndex;
+		const U32 curr = (U32)(ip - base);
+		const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
+		const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
+		const BYTE *repMatch = repBase + repIndex;
+		size_t mLength;
+		hashTable[h] = curr; /* update hash table */
+
+		if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
+		    (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
+			const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
+			mLength = ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
+			ip++;
+			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
+		} else {
+			if ((matchIndex < lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
+				ip += ((ip - anchor) >> g_searchStrength) + 1;
+				continue;
+			}
+			{
+				const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
+				const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
+				U32 offset;
+				mLength = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
+				while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
+					ip--;
+					match--;
+					mLength++;
+				} /* catch up */
+				offset = curr - matchIndex;
+				offset_2 = offset_1;
+				offset_1 = offset;
+				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+			}
+		}
+
+		/* found a match : store it */
+		ip += mLength;
+		anchor = ip;
+
+		if (ip <= ilimit) {
+			/* Fill Table */
+			hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2;
+			hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
+			/* check immediate repcode */
+			while (ip <= ilimit) {
+				U32 const curr2 = (U32)(ip - base);
+				U32 const repIndex2 = curr2 - offset_2;
+				const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
+				if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
+				    && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
+					const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
+					size_t repLength2 =
+					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+					U32 tmpOffset = offset_2;
+					offset_2 = offset_1;
+					offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
+					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
+					hashTable[ZSTD_hashPtr(ip, hBits, mls)] = curr2;
+					ip += repLength2;
+					anchor = ip;
+					continue;
+				}
+				break;
+			}
+		}
+	}
+
+	/* save reps for next block */
+	ctx->repToConfirm[0] = offset_1;
+	ctx->repToConfirm[1] = offset_2;
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	U32 const mls = ctx->params.cParams.searchLength;
+	switch (mls) {
+	default: /* includes case 3 */
+	case 4: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
+	case 5: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
+	case 6: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
+	case 7: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
+	}
+}
+
+/*-*************************************
+*  Double Fast
+***************************************/
+static void ZSTD_fillDoubleHashTable(ZSTD_CCtx *cctx, const void *end, const U32 mls)
+{
+	U32 *const hashLarge = cctx->hashTable;
+	U32 const hBitsL = cctx->params.cParams.hashLog;
+	U32 *const hashSmall = cctx->chainTable;
+	U32 const hBitsS = cctx->params.cParams.chainLog;
+	const BYTE *const base = cctx->base;
+	const BYTE *ip = base + cctx->nextToUpdate;
+	const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
+	const size_t fastHashFillStep = 3;
+
+	while (ip <= iend) {
+		hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
+		hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
+		ip += fastHashFillStep;
+	}
+}
+
+FORCE_INLINE
+void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
+{
+	U32 *const hashLong = cctx->hashTable;
+	const U32 hBitsL = cctx->params.cParams.hashLog;
+	U32 *const hashSmall = cctx->chainTable;
+	const U32 hBitsS = cctx->params.cParams.chainLog;
+	seqStore_t *seqStorePtr = &(cctx->seqStore);
+	const BYTE *const base = cctx->base;
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const U32 lowestIndex = cctx->dictLimit;
+	const BYTE *const lowest = base + lowestIndex;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - HASH_READ_SIZE;
+	U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
+	U32 offsetSaved = 0;
+
+	/* init */
+	ip += (ip == lowest);
+	{
+		U32 const maxRep = (U32)(ip - lowest);
+		if (offset_2 > maxRep)
+			offsetSaved = offset_2, offset_2 = 0;
+		if (offset_1 > maxRep)
+			offsetSaved = offset_1, offset_1 = 0;
+	}
+
+	/* Main Search Loop */
+	while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
+		size_t mLength;
+		size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+		size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+		U32 const curr = (U32)(ip - base);
+		U32 const matchIndexL = hashLong[h2];
+		U32 const matchIndexS = hashSmall[h];
+		const BYTE *matchLong = base + matchIndexL;
+		const BYTE *match = base + matchIndexS;
+		hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
+
+		if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { /* note : by construction, offset_1 <= curr */
+			mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
+			ip++;
+			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
+		} else {
+			U32 offset;
+			if ((matchIndexL > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
+				mLength = ZSTD_count(ip + 8, matchLong + 8, iend) + 8;
+				offset = (U32)(ip - matchLong);
+				while (((ip > anchor) & (matchLong > lowest)) && (ip[-1] == matchLong[-1])) {
+					ip--;
+					matchLong--;
+					mLength++;
+				} /* catch up */
+			} else if ((matchIndexS > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
+				size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
+				U32 const matchIndex3 = hashLong[h3];
+				const BYTE *match3 = base + matchIndex3;
+				hashLong[h3] = curr + 1;
+				if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
+					mLength = ZSTD_count(ip + 9, match3 + 8, iend) + 8;
+					ip++;
+					offset = (U32)(ip - match3);
+					while (((ip > anchor) & (match3 > lowest)) && (ip[-1] == match3[-1])) {
+						ip--;
+						match3--;
+						mLength++;
+					} /* catch up */
+				} else {
+					mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
+					offset = (U32)(ip - match);
+					while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
+						ip--;
+						match--;
+						mLength++;
+					} /* catch up */
+				}
+			} else {
+				ip += ((ip - anchor) >> g_searchStrength) + 1;
+				continue;
+			}
+
+			offset_2 = offset_1;
+			offset_1 = offset;
+
+			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+		}
+
+		/* match found */
+		ip += mLength;
+		anchor = ip;
+
+		if (ip <= ilimit) {
+			/* Fill Table */
+			hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] =
+			    curr + 2; /* here because curr+2 could be > iend-8 */
+			hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
+
+			/* check immediate repcode */
+			while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
+				/* store sequence */
+				size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
+				{
+					U32 const tmpOff = offset_2;
+					offset_2 = offset_1;
+					offset_1 = tmpOff;
+				} /* swap offset_2 <=> offset_1 */
+				hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
+				hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
+				ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
+				ip += rLength;
+				anchor = ip;
+				continue; /* faster when present ... (?) */
+			}
+		}
+	}
+
+	/* save reps for next block */
+	cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+	cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	const U32 mls = ctx->params.cParams.searchLength;
+	switch (mls) {
+	default: /* includes case 3 */
+	case 4: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
+	case 5: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
+	case 6: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
+	case 7: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
+	}
+}
+
+static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
+{
+	U32 *const hashLong = ctx->hashTable;
+	U32 const hBitsL = ctx->params.cParams.hashLog;
+	U32 *const hashSmall = ctx->chainTable;
+	U32 const hBitsS = ctx->params.cParams.chainLog;
+	seqStore_t *seqStorePtr = &(ctx->seqStore);
+	const BYTE *const base = ctx->base;
+	const BYTE *const dictBase = ctx->dictBase;
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const U32 lowestIndex = ctx->lowLimit;
+	const BYTE *const dictStart = dictBase + lowestIndex;
+	const U32 dictLimit = ctx->dictLimit;
+	const BYTE *const lowPrefixPtr = base + dictLimit;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - 8;
+	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
+
+	/* Search Loop */
+	while (ip < ilimit) { /* < instead of <=, because (ip+1) */
+		const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+		const U32 matchIndex = hashSmall[hSmall];
+		const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
+		const BYTE *match = matchBase + matchIndex;
+
+		const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+		const U32 matchLongIndex = hashLong[hLong];
+		const BYTE *matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
+		const BYTE *matchLong = matchLongBase + matchLongIndex;
+
+		const U32 curr = (U32)(ip - base);
+		const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
+		const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
+		const BYTE *repMatch = repBase + repIndex;
+		size_t mLength;
+		hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
+
+		if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
+		    (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
+			const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
+			mLength = ZSTD_count_2segments(ip + 1 + 4, repMatch + 4, iend, repMatchEnd, lowPrefixPtr) + 4;
+			ip++;
+			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
+		} else {
+			if ((matchLongIndex > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
+				const BYTE *matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
+				const BYTE *lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
+				U32 offset;
+				mLength = ZSTD_count_2segments(ip + 8, matchLong + 8, iend, matchEnd, lowPrefixPtr) + 8;
+				offset = curr - matchLongIndex;
+				while (((ip > anchor) & (matchLong > lowMatchPtr)) && (ip[-1] == matchLong[-1])) {
+					ip--;
+					matchLong--;
+					mLength++;
+				} /* catch up */
+				offset_2 = offset_1;
+				offset_1 = offset;
+				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+
+			} else if ((matchIndex > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
+				size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
+				U32 const matchIndex3 = hashLong[h3];
+				const BYTE *const match3Base = matchIndex3 < dictLimit ? dictBase : base;
+				const BYTE *match3 = match3Base + matchIndex3;
+				U32 offset;
+				hashLong[h3] = curr + 1;
+				if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
+					const BYTE *matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
+					const BYTE *lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
+					mLength = ZSTD_count_2segments(ip + 9, match3 + 8, iend, matchEnd, lowPrefixPtr) + 8;
+					ip++;
+					offset = curr + 1 - matchIndex3;
+					while (((ip > anchor) & (match3 > lowMatchPtr)) && (ip[-1] == match3[-1])) {
+						ip--;
+						match3--;
+						mLength++;
+					} /* catch up */
+				} else {
+					const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
+					const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
+					mLength = ZSTD_count_2segments(ip + 4, match + 4, iend, matchEnd, lowPrefixPtr) + 4;
+					offset = curr - matchIndex;
+					while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
+						ip--;
+						match--;
+						mLength++;
+					} /* catch up */
+				}
+				offset_2 = offset_1;
+				offset_1 = offset;
+				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+
+			} else {
+				ip += ((ip - anchor) >> g_searchStrength) + 1;
+				continue;
+			}
+		}
+
+		/* found a match : store it */
+		ip += mLength;
+		anchor = ip;
+
+		if (ip <= ilimit) {
+			/* Fill Table */
+			hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = curr + 2;
+			hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = curr + 2;
+			hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
+			hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = (U32)(ip - 2 - base);
+			/* check immediate repcode */
+			while (ip <= ilimit) {
+				U32 const curr2 = (U32)(ip - base);
+				U32 const repIndex2 = curr2 - offset_2;
+				const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
+				if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
+				    && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
+					const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
+					size_t const repLength2 =
+					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+					U32 tmpOffset = offset_2;
+					offset_2 = offset_1;
+					offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
+					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
+					hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = curr2;
+					hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = curr2;
+					ip += repLength2;
+					anchor = ip;
+					continue;
+				}
+				break;
+			}
+		}
+	}
+
+	/* save reps for next block */
+	ctx->repToConfirm[0] = offset_1;
+	ctx->repToConfirm[1] = offset_2;
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	U32 const mls = ctx->params.cParams.searchLength;
+	switch (mls) {
+	default: /* includes case 3 */
+	case 4: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
+	case 5: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
+	case 6: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
+	case 7: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
+	}
+}
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+*   ip : assumed <= iend-8 .
+*   @return : nb of positions added */
+static U32 ZSTD_insertBt1(ZSTD_CCtx *zc, const BYTE *const ip, const U32 mls, const BYTE *const iend, U32 nbCompares, U32 extDict)
+{
+	U32 *const hashTable = zc->hashTable;
+	U32 const hashLog = zc->params.cParams.hashLog;
+	size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
+	U32 *const bt = zc->chainTable;
+	U32 const btLog = zc->params.cParams.chainLog - 1;
+	U32 const btMask = (1 << btLog) - 1;
+	U32 matchIndex = hashTable[h];
+	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+	const BYTE *const base = zc->base;
+	const BYTE *const dictBase = zc->dictBase;
+	const U32 dictLimit = zc->dictLimit;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const BYTE *const prefixStart = base + dictLimit;
+	const BYTE *match;
+	const U32 curr = (U32)(ip - base);
+	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+	U32 *smallerPtr = bt + 2 * (curr & btMask);
+	U32 *largerPtr = smallerPtr + 1;
+	U32 dummy32; /* to be nullified at the end */
+	U32 const windowLow = zc->lowLimit;
+	U32 matchEndIdx = curr + 8;
+	size_t bestLength = 8;
+
+	hashTable[h] = curr; /* Update Hash Table */
+
+	while (nbCompares-- && (matchIndex > windowLow)) {
+		U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
+		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+
+		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
+			match = base + matchIndex;
+			if (match[matchLength] == ip[matchLength])
+				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
+		} else {
+			match = dictBase + matchIndex;
+			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
+			if (matchIndex + matchLength >= dictLimit)
+				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+		}
+
+		if (matchLength > bestLength) {
+			bestLength = matchLength;
+			if (matchLength > matchEndIdx - matchIndex)
+				matchEndIdx = matchIndex + (U32)matchLength;
+		}
+
+		if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
+			break;		      /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
+
+		if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
+			/* match is smaller than curr */
+			*smallerPtr = matchIndex;	  /* update smaller idx */
+			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+			if (matchIndex <= btLow) {
+				smallerPtr = &dummy32;
+				break;
+			}			  /* beyond tree size, stop the search */
+			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
+			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
+		} else {
+			/* match is larger than curr */
+			*largerPtr = matchIndex;
+			commonLengthLarger = matchLength;
+			if (matchIndex <= btLow) {
+				largerPtr = &dummy32;
+				break;
+			} /* beyond tree size, stop the search */
+			largerPtr = nextPtr;
+			matchIndex = nextPtr[0];
+		}
+	}
+
+	*smallerPtr = *largerPtr = 0;
+	if (bestLength > 384)
+		return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
+	if (matchEndIdx > curr + 8)
+		return matchEndIdx - curr - 8;
+	return 1;
+}
+
+static size_t ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, size_t *offsetPtr, U32 nbCompares, const U32 mls,
+					    U32 extDict)
+{
+	U32 *const hashTable = zc->hashTable;
+	U32 const hashLog = zc->params.cParams.hashLog;
+	size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
+	U32 *const bt = zc->chainTable;
+	U32 const btLog = zc->params.cParams.chainLog - 1;
+	U32 const btMask = (1 << btLog) - 1;
+	U32 matchIndex = hashTable[h];
+	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+	const BYTE *const base = zc->base;
+	const BYTE *const dictBase = zc->dictBase;
+	const U32 dictLimit = zc->dictLimit;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const BYTE *const prefixStart = base + dictLimit;
+	const U32 curr = (U32)(ip - base);
+	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+	const U32 windowLow = zc->lowLimit;
+	U32 *smallerPtr = bt + 2 * (curr & btMask);
+	U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
+	U32 matchEndIdx = curr + 8;
+	U32 dummy32; /* to be nullified at the end */
+	size_t bestLength = 0;
+
+	hashTable[h] = curr; /* Update Hash Table */
+
+	while (nbCompares-- && (matchIndex > windowLow)) {
+		U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
+		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+		const BYTE *match;
+
+		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
+			match = base + matchIndex;
+			if (match[matchLength] == ip[matchLength])
+				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
+		} else {
+			match = dictBase + matchIndex;
+			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
+			if (matchIndex + matchLength >= dictLimit)
+				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+		}
+
+		if (matchLength > bestLength) {
+			if (matchLength > matchEndIdx - matchIndex)
+				matchEndIdx = matchIndex + (U32)matchLength;
+			if ((4 * (int)(matchLength - bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)offsetPtr[0] + 1)))
+				bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+			if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
+				break;		      /* drop, to guarantee consistency (miss a little bit of compression) */
+		}
+
+		if (match[matchLength] < ip[matchLength]) {
+			/* match is smaller than curr */
+			*smallerPtr = matchIndex;	  /* update smaller idx */
+			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+			if (matchIndex <= btLow) {
+				smallerPtr = &dummy32;
+				break;
+			}			  /* beyond tree size, stop the search */
+			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
+			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
+		} else {
+			/* match is larger than curr */
+			*largerPtr = matchIndex;
+			commonLengthLarger = matchLength;
+			if (matchIndex <= btLow) {
+				largerPtr = &dummy32;
+				break;
+			} /* beyond tree size, stop the search */
+			largerPtr = nextPtr;
+			matchIndex = nextPtr[0];
+		}
+	}
+
+	*smallerPtr = *largerPtr = 0;
+
+	zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
+	return bestLength;
+}
+
+static void ZSTD_updateTree(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
+{
+	const BYTE *const base = zc->base;
+	const U32 target = (U32)(ip - base);
+	U32 idx = zc->nextToUpdate;
+
+	while (idx < target)
+		idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 0);
+}
+
+/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+static size_t ZSTD_BtFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls)
+{
+	if (ip < zc->base + zc->nextToUpdate)
+		return 0; /* skipped area */
+	ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
+	return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
+}
+
+static size_t ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
+					     const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+	switch (matchLengthSearch) {
+	default: /* includes case 3 */
+	case 4: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+	case 5: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+	case 7:
+	case 6: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+	}
+}
+
+static void ZSTD_updateTree_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
+{
+	const BYTE *const base = zc->base;
+	const U32 target = (U32)(ip - base);
+	U32 idx = zc->nextToUpdate;
+
+	while (idx < target)
+		idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 1);
+}
+
+/** Tree updater, providing best match */
+static size_t ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
+					   const U32 mls)
+{
+	if (ip < zc->base + zc->nextToUpdate)
+		return 0; /* skipped area */
+	ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
+	return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
+}
+
+static size_t ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
+						     const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
+						     const U32 matchLengthSearch)
+{
+	switch (matchLengthSearch) {
+	default: /* includes case 3 */
+	case 4: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+	case 5: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+	case 7:
+	case 6: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+	}
+}
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask) chainTable[(d)&mask]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_CCtx *zc, const BYTE *ip, U32 mls)
+{
+	U32 *const hashTable = zc->hashTable;
+	const U32 hashLog = zc->params.cParams.hashLog;
+	U32 *const chainTable = zc->chainTable;
+	const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
+	const BYTE *const base = zc->base;
+	const U32 target = (U32)(ip - base);
+	U32 idx = zc->nextToUpdate;
+
+	while (idx < target) { /* catch up */
+		size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls);
+		NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+		hashTable[h] = idx;
+		idx++;
+	}
+
+	zc->nextToUpdate = target;
+	return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE
+size_t ZSTD_HcFindBestMatch_generic(ZSTD_CCtx *zc, /* Index table will be updated */
+				    const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls,
+				    const U32 extDict)
+{
+	U32 *const chainTable = zc->chainTable;
+	const U32 chainSize = (1 << zc->params.cParams.chainLog);
+	const U32 chainMask = chainSize - 1;
+	const BYTE *const base = zc->base;
+	const BYTE *const dictBase = zc->dictBase;
+	const U32 dictLimit = zc->dictLimit;
+	const BYTE *const prefixStart = base + dictLimit;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const U32 lowLimit = zc->lowLimit;
+	const U32 curr = (U32)(ip - base);
+	const U32 minChain = curr > chainSize ? curr - chainSize : 0;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = EQUAL_READ32 - 1;
+
+	/* HC4 match finder */
+	U32 matchIndex = ZSTD_insertAndFindFirstIndex(zc, ip, mls);
+
+	for (; (matchIndex > lowLimit) & (nbAttempts > 0); nbAttempts--) {
+		const BYTE *match;
+		size_t currMl = 0;
+		if ((!extDict) || matchIndex >= dictLimit) {
+			match = base + matchIndex;
+			if (match[ml] == ip[ml]) /* potentially better */
+				currMl = ZSTD_count(ip, match, iLimit);
+		} else {
+			match = dictBase + matchIndex;
+			if (ZSTD_read32(match) == ZSTD_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+				currMl = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
+		}
+
+		/* save best solution */
+		if (currMl > ml) {
+			ml = currMl;
+			*offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+			if (ip + currMl == iLimit)
+				break; /* best possible, and avoid read overflow*/
+		}
+
+		if (matchIndex <= minChain)
+			break;
+		matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+	}
+
+	return ml;
+}
+
+FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
+						   const U32 matchLengthSearch)
+{
+	switch (matchLengthSearch) {
+	default: /* includes case 3 */
+	case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
+	case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+	case 7:
+	case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
+	}
+}
+
+FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
+							   const U32 matchLengthSearch)
+{
+	switch (matchLengthSearch) {
+	default: /* includes case 3 */
+	case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
+	case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+	case 7:
+	case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
+	}
+}
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+FORCE_INLINE
+void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
+{
+	seqStore_t *seqStorePtr = &(ctx->seqStore);
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - 8;
+	const BYTE *const base = ctx->base + ctx->dictLimit;
+
+	U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
+	U32 const mls = ctx->params.cParams.searchLength;
+
+	typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
+	searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
+	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset = 0;
+
+	/* init */
+	ip += (ip == base);
+	ctx->nextToUpdate3 = ctx->nextToUpdate;
+	{
+		U32 const maxRep = (U32)(ip - base);
+		if (offset_2 > maxRep)
+			savedOffset = offset_2, offset_2 = 0;
+		if (offset_1 > maxRep)
+			savedOffset = offset_1, offset_1 = 0;
+	}
+
+	/* Match Loop */
+	while (ip < ilimit) {
+		size_t matchLength = 0;
+		size_t offset = 0;
+		const BYTE *start = ip + 1;
+
+		/* check repCode */
+		if ((offset_1 > 0) & (ZSTD_read32(ip + 1) == ZSTD_read32(ip + 1 - offset_1))) {
+			/* repcode : we take it */
+			matchLength = ZSTD_count(ip + 1 + EQUAL_READ32, ip + 1 + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
+			if (depth == 0)
+				goto _storeSequence;
+		}
+
+		/* first search (depth 0) */
+		{
+			size_t offsetFound = 99999999;
+			size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+			if (ml2 > matchLength)
+				matchLength = ml2, start = ip, offset = offsetFound;
+		}
+
+		if (matchLength < EQUAL_READ32) {
+			ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
+			continue;
+		}
+
+		/* let's try to find a better solution */
+		if (depth >= 1)
+			while (ip < ilimit) {
+				ip++;
+				if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
+					size_t const mlRep = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
+					int const gain2 = (int)(mlRep * 3);
+					int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
+					if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
+						matchLength = mlRep, offset = 0, start = ip;
+				}
+				{
+					size_t offset2 = 99999999;
+					size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+					int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
+					int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
+					if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+						matchLength = ml2, offset = offset2, start = ip;
+						continue; /* search a better one */
+					}
+				}
+
+				/* let's find an even better one */
+				if ((depth == 2) && (ip < ilimit)) {
+					ip++;
+					if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
+						size_t const ml2 = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
+						int const gain2 = (int)(ml2 * 4);
+						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
+						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
+							matchLength = ml2, offset = 0, start = ip;
+					}
+					{
+						size_t offset2 = 99999999;
+						size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+						int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
+						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
+						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+							matchLength = ml2, offset = offset2, start = ip;
+							continue;
+						}
+					}
+				}
+				break; /* nothing found : store previous solution */
+			}
+
+		/* NOTE:
+		 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+		 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+		 * overflows the pointer, which is undefined behavior.
+		 */
+		/* catch up */
+		if (offset) {
+			while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) &&
+			       (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */
+			{
+				start--;
+				matchLength++;
+			}
+			offset_2 = offset_1;
+			offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+		}
+
+	/* store sequence */
+_storeSequence:
+		{
+			size_t const litLength = start - anchor;
+			ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
+			anchor = ip = start + matchLength;
+		}
+
+		/* check immediate repcode */
+		while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
+			/* store sequence */
+			matchLength = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_2, iend) + EQUAL_READ32;
+			offset = offset_2;
+			offset_2 = offset_1;
+			offset_1 = (U32)offset; /* swap repcodes */
+			ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
+			ip += matchLength;
+			anchor = ip;
+			continue; /* faster when present ... (?) */
+		}
+	}
+
+	/* Save reps for next block */
+	ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
+	ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); }
+
+static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); }
+
+static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); }
+
+static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); }
+
+FORCE_INLINE
+void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
+{
+	seqStore_t *seqStorePtr = &(ctx->seqStore);
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - 8;
+	const BYTE *const base = ctx->base;
+	const U32 dictLimit = ctx->dictLimit;
+	const U32 lowestIndex = ctx->lowLimit;
+	const BYTE *const prefixStart = base + dictLimit;
+	const BYTE *const dictBase = ctx->dictBase;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const BYTE *const dictStart = dictBase + ctx->lowLimit;
+
+	const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
+	const U32 mls = ctx->params.cParams.searchLength;
+
+	typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
+	searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
+
+	/* init */
+	ctx->nextToUpdate3 = ctx->nextToUpdate;
+	ip += (ip == prefixStart);
+
+	/* Match Loop */
+	while (ip < ilimit) {
+		size_t matchLength = 0;
+		size_t offset = 0;
+		const BYTE *start = ip + 1;
+		U32 curr = (U32)(ip - base);
+
+		/* check repCode */
+		{
+			const U32 repIndex = (U32)(curr + 1 - offset_1);
+			const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
+			const BYTE *const repMatch = repBase + repIndex;
+			if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+				if (ZSTD_read32(ip + 1) == ZSTD_read32(repMatch)) {
+					/* repcode detected we should take it */
+					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
+					matchLength =
+					    ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+					if (depth == 0)
+						goto _storeSequence;
+				}
+		}
+
+		/* first search (depth 0) */
+		{
+			size_t offsetFound = 99999999;
+			size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+			if (ml2 > matchLength)
+				matchLength = ml2, start = ip, offset = offsetFound;
+		}
+
+		if (matchLength < EQUAL_READ32) {
+			ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
+			continue;
+		}
+
+		/* let's try to find a better solution */
+		if (depth >= 1)
+			while (ip < ilimit) {
+				ip++;
+				curr++;
+				/* check repCode */
+				if (offset) {
+					const U32 repIndex = (U32)(curr - offset_1);
+					const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
+					const BYTE *const repMatch = repBase + repIndex;
+					if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+						if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
+							/* repcode detected */
+							const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
+							size_t const repLength =
+							    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) +
+							    EQUAL_READ32;
+							int const gain2 = (int)(repLength * 3);
+							int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
+							if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+								matchLength = repLength, offset = 0, start = ip;
+						}
+				}
+
+				/* search match, depth 1 */
+				{
+					size_t offset2 = 99999999;
+					size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+					int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
+					int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
+					if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+						matchLength = ml2, offset = offset2, start = ip;
+						continue; /* search a better one */
+					}
+				}
+
+				/* let's find an even better one */
+				if ((depth == 2) && (ip < ilimit)) {
+					ip++;
+					curr++;
+					/* check repCode */
+					if (offset) {
+						const U32 repIndex = (U32)(curr - offset_1);
+						const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
+						const BYTE *const repMatch = repBase + repIndex;
+						if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+							if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
+								/* repcode detected */
+								const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
+								size_t repLength = ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend,
+													repEnd, prefixStart) +
+										   EQUAL_READ32;
+								int gain2 = (int)(repLength * 4);
+								int gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
+								if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+									matchLength = repLength, offset = 0, start = ip;
+							}
+					}
+
+					/* search match, depth 2 */
+					{
+						size_t offset2 = 99999999;
+						size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+						int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
+						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
+						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+							matchLength = ml2, offset = offset2, start = ip;
+							continue;
+						}
+					}
+				}
+				break; /* nothing found : store previous solution */
+			}
+
+		/* catch up */
+		if (offset) {
+			U32 const matchIndex = (U32)((start - base) - (offset - ZSTD_REP_MOVE));
+			const BYTE *match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+			const BYTE *const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+			while ((start > anchor) && (match > mStart) && (start[-1] == match[-1])) {
+				start--;
+				match--;
+				matchLength++;
+			} /* catch up */
+			offset_2 = offset_1;
+			offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+		}
+
+	/* store sequence */
+	_storeSequence : {
+		size_t const litLength = start - anchor;
+		ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
+		anchor = ip = start + matchLength;
+	}
+
+		/* check immediate repcode */
+		while (ip <= ilimit) {
+			const U32 repIndex = (U32)((ip - base) - offset_2);
+			const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
+			const BYTE *const repMatch = repBase + repIndex;
+			if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+				if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
+					/* repcode detected we should take it */
+					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
+					matchLength =
+					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+					offset = offset_2;
+					offset_2 = offset_1;
+					offset_1 = (U32)offset; /* swap offset history */
+					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
+					ip += matchLength;
+					anchor = ip;
+					continue; /* faster when present ... (?) */
+				}
+			break;
+		}
+	}
+
+	/* Save reps for next block */
+	ctx->repToConfirm[0] = offset_1;
+	ctx->repToConfirm[1] = offset_2;
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); }
+
+static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
+}
+
+static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
+}
+
+static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
+}
+
+/* The optimal parser */
+#include "zstd_opt.h"
+
+static void ZSTD_compressBlock_btopt(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+	ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
+#else
+	(void)ctx;
+	(void)src;
+	(void)srcSize;
+	return;
+#endif
+}
+
+static void ZSTD_compressBlock_btopt2(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+	ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
+#else
+	(void)ctx;
+	(void)src;
+	(void)srcSize;
+	return;
+#endif
+}
+
+static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+	ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
+#else
+	(void)ctx;
+	(void)src;
+	(void)srcSize;
+	return;
+#endif
+}
+
+static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+	ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
+#else
+	(void)ctx;
+	(void)src;
+	(void)srcSize;
+	return;
+#endif
+}
+
+typedef void (*ZSTD_blockCompressor)(ZSTD_CCtx *ctx, const void *src, size_t srcSize);
+
+static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
+{
+	static const ZSTD_blockCompressor blockCompressor[2][8] = {
+	    {ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2,
+	     ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2},
+	    {ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,
+	     ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict}};
+
+	return blockCompressor[extDict][(U32)strat];
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
+	const BYTE *const base = zc->base;
+	const BYTE *const istart = (const BYTE *)src;
+	const U32 curr = (U32)(istart - base);
+	if (srcSize < MIN_CBLOCK_SIZE + ZSTD_blockHeaderSize + 1)
+		return 0; /* don't even attempt compression below a certain srcSize */
+	ZSTD_resetSeqStore(&(zc->seqStore));
+	if (curr > zc->nextToUpdate + 384)
+		zc->nextToUpdate = curr - MIN(192, (U32)(curr - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
+	blockCompressor(zc, src, srcSize);
+	return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
+}
+
+/*! ZSTD_compress_generic() :
+*   Compress a chunk of data into one or multiple blocks.
+*   All blocks will be terminated, all input will be consumed.
+*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+*   Frame is supposed already started (header already produced)
+*   @return : compressed size, or an error code
+*/
+static size_t ZSTD_compress_generic(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastFrameChunk)
+{
+	size_t blockSize = cctx->blockSize;
+	size_t remaining = srcSize;
+	const BYTE *ip = (const BYTE *)src;
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *op = ostart;
+	U32 const maxDist = 1 << cctx->params.cParams.windowLog;
+
+	if (cctx->params.fParams.checksumFlag && srcSize)
+		xxh64_update(&cctx->xxhState, src, srcSize);
+
+	while (remaining) {
+		U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+		size_t cSize;
+
+		if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
+			return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
+		if (remaining < blockSize)
+			blockSize = remaining;
+
+		/* preemptive overflow correction */
+		if (cctx->lowLimit > (3U << 29)) {
+			U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
+			U32 const curr = (U32)(ip - cctx->base);
+			U32 const newCurr = (curr & cycleMask) + (1 << cctx->params.cParams.windowLog);
+			U32 const correction = curr - newCurr;
+			ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
+			ZSTD_reduceIndex(cctx, correction);
+			cctx->base += correction;
+			cctx->dictBase += correction;
+			cctx->lowLimit -= correction;
+			cctx->dictLimit -= correction;
+			if (cctx->nextToUpdate < correction)
+				cctx->nextToUpdate = 0;
+			else
+				cctx->nextToUpdate -= correction;
+		}
+
+		if ((U32)(ip + blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
+			/* enforce maxDist */
+			U32 const newLowLimit = (U32)(ip + blockSize - cctx->base) - maxDist;
+			if (cctx->lowLimit < newLowLimit)
+				cctx->lowLimit = newLowLimit;
+			if (cctx->dictLimit < cctx->lowLimit)
+				cctx->dictLimit = cctx->lowLimit;
+		}
+
+		cSize = ZSTD_compressBlock_internal(cctx, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, ip, blockSize);
+		if (ZSTD_isError(cSize))
+			return cSize;
+
+		if (cSize == 0) { /* block is not compressible */
+			U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw) << 1) + (U32)(blockSize << 3);
+			if (blockSize + ZSTD_blockHeaderSize > dstCapacity)
+				return ERROR(dstSize_tooSmall);
+			ZSTD_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
+			memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
+			cSize = ZSTD_blockHeaderSize + blockSize;
+		} else {
+			U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed) << 1) + (U32)(cSize << 3);
+			ZSTD_writeLE24(op, cBlockHeader24);
+			cSize += ZSTD_blockHeaderSize;
+		}
+
+		remaining -= blockSize;
+		dstCapacity -= cSize;
+		ip += blockSize;
+		op += cSize;
+	}
+
+	if (lastFrameChunk && (op > ostart))
+		cctx->stage = ZSTDcs_ending;
+	return op - ostart;
+}
+
+static size_t ZSTD_writeFrameHeader(void *dst, size_t dstCapacity, ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
+{
+	BYTE *const op = (BYTE *)dst;
+	U32 const dictIDSizeCode = (dictID > 0) + (dictID >= 256) + (dictID >= 65536); /* 0-3 */
+	U32 const checksumFlag = params.fParams.checksumFlag > 0;
+	U32 const windowSize = 1U << params.cParams.windowLog;
+	U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+	BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+	U32 const fcsCode =
+	    params.fParams.contentSizeFlag ? (pledgedSrcSize >= 256) + (pledgedSrcSize >= 65536 + 256) + (pledgedSrcSize >= 0xFFFFFFFFU) : 0; /* 0-3 */
+	BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag << 2) + (singleSegment << 5) + (fcsCode << 6));
+	size_t pos;
+
+	if (dstCapacity < ZSTD_frameHeaderSize_max)
+		return ERROR(dstSize_tooSmall);
+
+	ZSTD_writeLE32(dst, ZSTD_MAGICNUMBER);
+	op[4] = frameHeaderDecriptionByte;
+	pos = 5;
+	if (!singleSegment)
+		op[pos++] = windowLogByte;
+	switch (dictIDSizeCode) {
+	default: /* impossible */
+	case 0: break;
+	case 1:
+		op[pos] = (BYTE)(dictID);
+		pos++;
+		break;
+	case 2:
+		ZSTD_writeLE16(op + pos, (U16)dictID);
+		pos += 2;
+		break;
+	case 3:
+		ZSTD_writeLE32(op + pos, dictID);
+		pos += 4;
+		break;
+	}
+	switch (fcsCode) {
+	default: /* impossible */
+	case 0:
+		if (singleSegment)
+			op[pos++] = (BYTE)(pledgedSrcSize);
+		break;
+	case 1:
+		ZSTD_writeLE16(op + pos, (U16)(pledgedSrcSize - 256));
+		pos += 2;
+		break;
+	case 2:
+		ZSTD_writeLE32(op + pos, (U32)(pledgedSrcSize));
+		pos += 4;
+		break;
+	case 3:
+		ZSTD_writeLE64(op + pos, (U64)(pledgedSrcSize));
+		pos += 8;
+		break;
+	}
+	return pos;
+}
+
+static size_t ZSTD_compressContinue_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 frame, U32 lastFrameChunk)
+{
+	const BYTE *const ip = (const BYTE *)src;
+	size_t fhSize = 0;
+
+	if (cctx->stage == ZSTDcs_created)
+		return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
+
+	if (frame && (cctx->stage == ZSTDcs_init)) {
+		fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
+		if (ZSTD_isError(fhSize))
+			return fhSize;
+		dstCapacity -= fhSize;
+		dst = (char *)dst + fhSize;
+		cctx->stage = ZSTDcs_ongoing;
+	}
+
+	/* Check if blocks follow each other */
+	if (src != cctx->nextSrc) {
+		/* not contiguous */
+		ptrdiff_t const delta = cctx->nextSrc - ip;
+		cctx->lowLimit = cctx->dictLimit;
+		cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
+		cctx->dictBase = cctx->base;
+		cctx->base -= delta;
+		cctx->nextToUpdate = cctx->dictLimit;
+		if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE)
+			cctx->lowLimit = cctx->dictLimit; /* too small extDict */
+	}
+
+	/* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+	if ((ip + srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
+		ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
+		U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
+		cctx->lowLimit = lowLimitMax;
+	}
+
+	cctx->nextSrc = ip + srcSize;
+
+	if (srcSize) {
+		size_t const cSize = frame ? ZSTD_compress_generic(cctx, dst, dstCapacity, src, srcSize, lastFrameChunk)
+					   : ZSTD_compressBlock_internal(cctx, dst, dstCapacity, src, srcSize);
+		if (ZSTD_isError(cSize))
+			return cSize;
+		return cSize + fhSize;
+	} else
+		return fhSize;
+}
+
+size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
+}
+
+size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx) { return MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); }
+
+size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
+	if (srcSize > blockSizeMax)
+		return ERROR(srcSize_wrong);
+	return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
+}
+
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
+static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx *zc, const void *src, size_t srcSize)
+{
+	const BYTE *const ip = (const BYTE *)src;
+	const BYTE *const iend = ip + srcSize;
+
+	/* input becomes curr prefix */
+	zc->lowLimit = zc->dictLimit;
+	zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+	zc->dictBase = zc->base;
+	zc->base += ip - zc->nextSrc;
+	zc->nextToUpdate = zc->dictLimit;
+	zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
+
+	zc->nextSrc = iend;
+	if (srcSize <= HASH_READ_SIZE)
+		return 0;
+
+	switch (zc->params.cParams.strategy) {
+	case ZSTD_fast: ZSTD_fillHashTable(zc, iend, zc->params.cParams.searchLength); break;
+
+	case ZSTD_dfast: ZSTD_fillDoubleHashTable(zc, iend, zc->params.cParams.searchLength); break;
+
+	case ZSTD_greedy:
+	case ZSTD_lazy:
+	case ZSTD_lazy2:
+		if (srcSize >= HASH_READ_SIZE)
+			ZSTD_insertAndFindFirstIndex(zc, iend - HASH_READ_SIZE, zc->params.cParams.searchLength);
+		break;
+
+	case ZSTD_btlazy2:
+	case ZSTD_btopt:
+	case ZSTD_btopt2:
+		if (srcSize >= HASH_READ_SIZE)
+			ZSTD_updateTree(zc, iend - HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
+		break;
+
+	default:
+		return ERROR(GENERIC); /* strategy doesn't exist; impossible */
+	}
+
+	zc->nextToUpdate = (U32)(iend - zc->base);
+	return 0;
+}
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
+   that we may encounter during compression.
+   NOTE: This behavior is not standard and could be improved in the future. */
+static size_t ZSTD_checkDictNCount(short *normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
+{
+	U32 s;
+	if (dictMaxSymbolValue < maxSymbolValue)
+		return ERROR(dictionary_corrupted);
+	for (s = 0; s <= maxSymbolValue; ++s) {
+		if (normalizedCounter[s] == 0)
+			return ERROR(dictionary_corrupted);
+	}
+	return 0;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : 0, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed > 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
+{
+	const BYTE *dictPtr = (const BYTE *)dict;
+	const BYTE *const dictEnd = dictPtr + dictSize;
+	short offcodeNCount[MaxOff + 1];
+	unsigned offcodeMaxValue = MaxOff;
+
+	dictPtr += 4; /* skip magic number */
+	cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : ZSTD_readLE32(dictPtr);
+	dictPtr += 4;
+
+	{
+		size_t const hufHeaderSize = HUF_readCTable_wksp(cctx->hufTable, 255, dictPtr, dictEnd - dictPtr, cctx->tmpCounters, sizeof(cctx->tmpCounters));
+		if (HUF_isError(hufHeaderSize))
+			return ERROR(dictionary_corrupted);
+		dictPtr += hufHeaderSize;
+	}
+
+	{
+		unsigned offcodeLog;
+		size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
+		if (FSE_isError(offcodeHeaderSize))
+			return ERROR(dictionary_corrupted);
+		if (offcodeLog > OffFSELog)
+			return ERROR(dictionary_corrupted);
+		/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+		CHECK_E(FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
+			dictionary_corrupted);
+		dictPtr += offcodeHeaderSize;
+	}
+
+	{
+		short matchlengthNCount[MaxML + 1];
+		unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+		size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
+		if (FSE_isError(matchlengthHeaderSize))
+			return ERROR(dictionary_corrupted);
+		if (matchlengthLog > MLFSELog)
+			return ERROR(dictionary_corrupted);
+		/* Every match length code must have non-zero probability */
+		CHECK_F(ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
+		CHECK_E(
+		    FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
+		    dictionary_corrupted);
+		dictPtr += matchlengthHeaderSize;
+	}
+
+	{
+		short litlengthNCount[MaxLL + 1];
+		unsigned litlengthMaxValue = MaxLL, litlengthLog;
+		size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
+		if (FSE_isError(litlengthHeaderSize))
+			return ERROR(dictionary_corrupted);
+		if (litlengthLog > LLFSELog)
+			return ERROR(dictionary_corrupted);
+		/* Every literal length code must have non-zero probability */
+		CHECK_F(ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
+		CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
+			dictionary_corrupted);
+		dictPtr += litlengthHeaderSize;
+	}
+
+	if (dictPtr + 12 > dictEnd)
+		return ERROR(dictionary_corrupted);
+	cctx->rep[0] = ZSTD_readLE32(dictPtr + 0);
+	cctx->rep[1] = ZSTD_readLE32(dictPtr + 4);
+	cctx->rep[2] = ZSTD_readLE32(dictPtr + 8);
+	dictPtr += 12;
+
+	{
+		size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+		U32 offcodeMax = MaxOff;
+		if (dictContentSize <= ((U32)-1) - 128 KB) {
+			U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+			offcodeMax = ZSTD_highbit32(maxOffset);		     /* Calculate minimum offset code required to represent maxOffset */
+		}
+		/* All offset values <= dictContentSize + 128 KB must be representable */
+		CHECK_F(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
+		/* All repCodes must be <= dictContentSize and != 0*/
+		{
+			U32 u;
+			for (u = 0; u < 3; u++) {
+				if (cctx->rep[u] == 0)
+					return ERROR(dictionary_corrupted);
+				if (cctx->rep[u] > dictContentSize)
+					return ERROR(dictionary_corrupted);
+			}
+		}
+
+		cctx->flagStaticTables = 1;
+		cctx->flagStaticHufTable = HUF_repeat_valid;
+		return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
+	}
+}
+
+/** ZSTD_compress_insertDictionary() :
+*   @return : 0, or an error code */
+static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
+{
+	if ((dict == NULL) || (dictSize <= 8))
+		return 0;
+
+	/* dict as pure content */
+	if ((ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
+		return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+
+	/* dict as zstd dictionary */
+	return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
+}
+
+/*! ZSTD_compressBegin_internal() :
+*   @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize)
+{
+	ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
+	CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
+	return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
+}
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+	/* compression parameters verification and optimization */
+	CHECK_F(ZSTD_checkCParams(params.cParams));
+	return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
+}
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, int compressionLevel)
+{
+	ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
+	return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
+}
+
+size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel) { return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); }
+
+/*! ZSTD_writeEpilogue() :
+*   Ends a frame.
+*   @return : nb of bytes written into dst (or an error code) */
+static size_t ZSTD_writeEpilogue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity)
+{
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *op = ostart;
+	size_t fhSize = 0;
+
+	if (cctx->stage == ZSTDcs_created)
+		return ERROR(stage_wrong); /* init missing */
+
+	/* special case : empty frame */
+	if (cctx->stage == ZSTDcs_init) {
+		fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
+		if (ZSTD_isError(fhSize))
+			return fhSize;
+		dstCapacity -= fhSize;
+		op += fhSize;
+		cctx->stage = ZSTDcs_ongoing;
+	}
+
+	if (cctx->stage != ZSTDcs_ending) {
+		/* write one last empty block, make it the "last" block */
+		U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw) << 1) + 0;
+		if (dstCapacity < 4)
+			return ERROR(dstSize_tooSmall);
+		ZSTD_writeLE32(op, cBlockHeader24);
+		op += ZSTD_blockHeaderSize;
+		dstCapacity -= ZSTD_blockHeaderSize;
+	}
+
+	if (cctx->params.fParams.checksumFlag) {
+		U32 const checksum = (U32)xxh64_digest(&cctx->xxhState);
+		if (dstCapacity < 4)
+			return ERROR(dstSize_tooSmall);
+		ZSTD_writeLE32(op, checksum);
+		op += 4;
+	}
+
+	cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
+	return op - ostart;
+}
+
+size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	size_t endResult;
+	size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
+	if (ZSTD_isError(cSize))
+		return cSize;
+	endResult = ZSTD_writeEpilogue(cctx, (char *)dst + cSize, dstCapacity - cSize);
+	if (ZSTD_isError(endResult))
+		return endResult;
+	return cSize + endResult;
+}
+
+static size_t ZSTD_compress_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
+				     ZSTD_parameters params)
+{
+	CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
+	return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
+			       ZSTD_parameters params)
+{
+	return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
+}
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, ZSTD_parameters params)
+{
+	return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
+}
+
+/* =====  Dictionary API  ===== */
+
+struct ZSTD_CDict_s {
+	void *dictBuffer;
+	const void *dictContent;
+	size_t dictContentSize;
+	ZSTD_CCtx *refContext;
+}; /* typedef'd tp ZSTD_CDict within "zstd.h" */
+
+size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams) { return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CDict)); }
+
+static ZSTD_CDict *ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem)
+{
+	if (!customMem.customAlloc || !customMem.customFree)
+		return NULL;
+
+	{
+		ZSTD_CDict *const cdict = (ZSTD_CDict *)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
+		ZSTD_CCtx *const cctx = ZSTD_createCCtx_advanced(customMem);
+
+		if (!cdict || !cctx) {
+			ZSTD_free(cdict, customMem);
+			ZSTD_freeCCtx(cctx);
+			return NULL;
+		}
+
+		if ((byReference) || (!dictBuffer) || (!dictSize)) {
+			cdict->dictBuffer = NULL;
+			cdict->dictContent = dictBuffer;
+		} else {
+			void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
+			if (!internalBuffer) {
+				ZSTD_free(cctx, customMem);
+				ZSTD_free(cdict, customMem);
+				return NULL;
+			}
+			memcpy(internalBuffer, dictBuffer, dictSize);
+			cdict->dictBuffer = internalBuffer;
+			cdict->dictContent = internalBuffer;
+		}
+
+		{
+			size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
+			if (ZSTD_isError(errorCode)) {
+				ZSTD_free(cdict->dictBuffer, customMem);
+				ZSTD_free(cdict, customMem);
+				ZSTD_freeCCtx(cctx);
+				return NULL;
+			}
+		}
+
+		cdict->refContext = cctx;
+		cdict->dictContentSize = dictSize;
+		return cdict;
+	}
+}
+
+ZSTD_CDict *ZSTD_initCDict(const void *dict, size_t dictSize, ZSTD_parameters params, void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
+	return ZSTD_createCDict_advanced(dict, dictSize, 1, params, stackMem);
+}
+
+size_t ZSTD_freeCDict(ZSTD_CDict *cdict)
+{
+	if (cdict == NULL)
+		return 0; /* support free on NULL */
+	{
+		ZSTD_customMem const cMem = cdict->refContext->customMem;
+		ZSTD_freeCCtx(cdict->refContext);
+		ZSTD_free(cdict->dictBuffer, cMem);
+		ZSTD_free(cdict, cMem);
+		return 0;
+	}
+}
+
+static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict *cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); }
+
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize)
+{
+	if (cdict->dictContentSize)
+		CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
+	else {
+		ZSTD_parameters params = cdict->refContext->params;
+		params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
+		CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
+	}
+	return 0;
+}
+
+/*! ZSTD_compress_usingCDict() :
+*   Compression using a digested Dictionary.
+*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+*   Note that compression level is decided during dictionary creation */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_CDict *cdict)
+{
+	CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
+
+	if (cdict->refContext->params.fParams.contentSizeFlag == 1) {
+		cctx->params.fParams.contentSizeFlag = 1;
+		cctx->frameContentSize = srcSize;
+	} else {
+		cctx->params.fParams.contentSizeFlag = 0;
+	}
+
+	return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/* ******************************************************************
+*  Streaming
+********************************************************************/
+
+typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
+
+struct ZSTD_CStream_s {
+	ZSTD_CCtx *cctx;
+	ZSTD_CDict *cdictLocal;
+	const ZSTD_CDict *cdict;
+	char *inBuff;
+	size_t inBuffSize;
+	size_t inToCompress;
+	size_t inBuffPos;
+	size_t inBuffTarget;
+	size_t blockSize;
+	char *outBuff;
+	size_t outBuffSize;
+	size_t outBuffContentSize;
+	size_t outBuffFlushedSize;
+	ZSTD_cStreamStage stage;
+	U32 checksum;
+	U32 frameEnded;
+	U64 pledgedSrcSize;
+	U64 inputProcessed;
+	ZSTD_parameters params;
+	ZSTD_customMem customMem;
+}; /* typedef'd to ZSTD_CStream within "zstd.h" */
+
+size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams)
+{
+	size_t const inBuffSize = (size_t)1 << cParams.windowLog;
+	size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, inBuffSize);
+	size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
+
+	return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
+}
+
+ZSTD_CStream *ZSTD_createCStream_advanced(ZSTD_customMem customMem)
+{
+	ZSTD_CStream *zcs;
+
+	if (!customMem.customAlloc || !customMem.customFree)
+		return NULL;
+
+	zcs = (ZSTD_CStream *)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
+	if (zcs == NULL)
+		return NULL;
+	memset(zcs, 0, sizeof(ZSTD_CStream));
+	memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
+	zcs->cctx = ZSTD_createCCtx_advanced(customMem);
+	if (zcs->cctx == NULL) {
+		ZSTD_freeCStream(zcs);
+		return NULL;
+	}
+	return zcs;
+}
+
+size_t ZSTD_freeCStream(ZSTD_CStream *zcs)
+{
+	if (zcs == NULL)
+		return 0; /* support free on NULL */
+	{
+		ZSTD_customMem const cMem = zcs->customMem;
+		ZSTD_freeCCtx(zcs->cctx);
+		zcs->cctx = NULL;
+		ZSTD_freeCDict(zcs->cdictLocal);
+		zcs->cdictLocal = NULL;
+		ZSTD_free(zcs->inBuff, cMem);
+		zcs->inBuff = NULL;
+		ZSTD_free(zcs->outBuff, cMem);
+		zcs->outBuff = NULL;
+		ZSTD_free(zcs, cMem);
+		return 0;
+	}
+}
+
+/*======   Initialization   ======*/
+
+size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
+size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */; }
+
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
+{
+	if (zcs->inBuffSize == 0)
+		return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
+
+	if (zcs->cdict)
+		CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
+	else
+		CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
+
+	zcs->inToCompress = 0;
+	zcs->inBuffPos = 0;
+	zcs->inBuffTarget = zcs->blockSize;
+	zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+	zcs->stage = zcss_load;
+	zcs->frameEnded = 0;
+	zcs->pledgedSrcSize = pledgedSrcSize;
+	zcs->inputProcessed = 0;
+	return 0; /* ready to go */
+}
+
+size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
+{
+
+	zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
+
+	return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
+}
+
+static size_t ZSTD_initCStream_advanced(ZSTD_CStream *zcs, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+	/* allocate buffers */
+	{
+		size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
+		if (zcs->inBuffSize < neededInBuffSize) {
+			zcs->inBuffSize = neededInBuffSize;
+			ZSTD_free(zcs->inBuff, zcs->customMem);
+			zcs->inBuff = (char *)ZSTD_malloc(neededInBuffSize, zcs->customMem);
+			if (zcs->inBuff == NULL)
+				return ERROR(memory_allocation);
+		}
+		zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
+	}
+	if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize) + 1) {
+		zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize) + 1;
+		ZSTD_free(zcs->outBuff, zcs->customMem);
+		zcs->outBuff = (char *)ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
+		if (zcs->outBuff == NULL)
+			return ERROR(memory_allocation);
+	}
+
+	if (dict && dictSize >= 8) {
+		ZSTD_freeCDict(zcs->cdictLocal);
+		zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
+		if (zcs->cdictLocal == NULL)
+			return ERROR(memory_allocation);
+		zcs->cdict = zcs->cdictLocal;
+	} else
+		zcs->cdict = NULL;
+
+	zcs->checksum = params.fParams.checksumFlag > 0;
+	zcs->params = params;
+
+	return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
+}
+
+ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
+	ZSTD_CStream *const zcs = ZSTD_createCStream_advanced(stackMem);
+	if (zcs) {
+		size_t const code = ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
+		if (ZSTD_isError(code)) {
+			return NULL;
+		}
+	}
+	return zcs;
+}
+
+ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
+{
+	ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
+	ZSTD_CStream *const zcs = ZSTD_initCStream(params, pledgedSrcSize, workspace, workspaceSize);
+	if (zcs) {
+		zcs->cdict = cdict;
+		if (ZSTD_isError(ZSTD_resetCStream_internal(zcs, pledgedSrcSize))) {
+			return NULL;
+		}
+	}
+	return zcs;
+}
+
+/*======   Compression   ======*/
+
+typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
+
+ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	size_t const length = MIN(dstCapacity, srcSize);
+	memcpy(dst, src, length);
+	return length;
+}
+
+static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *dstCapacityPtr, const void *src, size_t *srcSizePtr, ZSTD_flush_e const flush)
+{
+	U32 someMoreWork = 1;
+	const char *const istart = (const char *)src;
+	const char *const iend = istart + *srcSizePtr;
+	const char *ip = istart;
+	char *const ostart = (char *)dst;
+	char *const oend = ostart + *dstCapacityPtr;
+	char *op = ostart;
+
+	while (someMoreWork) {
+		switch (zcs->stage) {
+		case zcss_init:
+			return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
+
+		case zcss_load:
+			/* complete inBuffer */
+			{
+				size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+				size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend - ip);
+				zcs->inBuffPos += loaded;
+				ip += loaded;
+				if ((zcs->inBuffPos == zcs->inToCompress) || (!flush && (toLoad != loaded))) {
+					someMoreWork = 0;
+					break; /* not enough input to get a full block : stop there, wait for more */
+				}
+			}
+			/* compress curr block (note : this stage cannot be stopped in the middle) */
+			{
+				void *cDst;
+				size_t cSize;
+				size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
+				size_t oSize = oend - op;
+				if (oSize >= ZSTD_compressBound(iSize))
+					cDst = op; /* compress directly into output buffer (avoid flush stage) */
+				else
+					cDst = zcs->outBuff, oSize = zcs->outBuffSize;
+				cSize = (flush == zsf_end) ? ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize)
+							   : ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
+				if (ZSTD_isError(cSize))
+					return cSize;
+				if (flush == zsf_end)
+					zcs->frameEnded = 1;
+				/* prepare next block */
+				zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+				if (zcs->inBuffTarget > zcs->inBuffSize)
+					zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
+				zcs->inToCompress = zcs->inBuffPos;
+				if (cDst == op) {
+					op += cSize;
+					break;
+				} /* no need to flush */
+				zcs->outBuffContentSize = cSize;
+				zcs->outBuffFlushedSize = 0;
+				zcs->stage = zcss_flush; /* pass-through to flush stage */
+			}
+
+		case zcss_flush: {
+			size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+			size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+			op += flushed;
+			zcs->outBuffFlushedSize += flushed;
+			if (toFlush != flushed) {
+				someMoreWork = 0;
+				break;
+			} /* dst too small to store flushed data : stop there */
+			zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+			zcs->stage = zcss_load;
+			break;
+		}
+
+		case zcss_final:
+			someMoreWork = 0; /* do nothing */
+			break;
+
+		default:
+			return ERROR(GENERIC); /* impossible */
+		}
+	}
+
+	*srcSizePtr = ip - istart;
+	*dstCapacityPtr = op - ostart;
+	zcs->inputProcessed += *srcSizePtr;
+	if (zcs->frameEnded)
+		return 0;
+	{
+		size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
+		if (hintInSize == 0)
+			hintInSize = zcs->blockSize;
+		return hintInSize;
+	}
+}
+
+size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
+{
+	size_t sizeRead = input->size - input->pos;
+	size_t sizeWritten = output->size - output->pos;
+	size_t const result =
+	    ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, (const char *)(input->src) + input->pos, &sizeRead, zsf_gather);
+	input->pos += sizeRead;
+	output->pos += sizeWritten;
+	return result;
+}
+
+/*======   Finalize   ======*/
+
+/*! ZSTD_flushStream() :
+*   @return : amount of data remaining to flush */
+size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
+{
+	size_t srcSize = 0;
+	size_t sizeWritten = output->size - output->pos;
+	size_t const result = ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, &srcSize,
+							  &srcSize, /* use a valid src address instead of NULL */
+							  zsf_flush);
+	output->pos += sizeWritten;
+	if (ZSTD_isError(result))
+		return result;
+	return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
+{
+	BYTE *const ostart = (BYTE *)(output->dst) + output->pos;
+	BYTE *const oend = (BYTE *)(output->dst) + output->size;
+	BYTE *op = ostart;
+
+	if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
+		return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
+
+	if (zcs->stage != zcss_final) {
+		/* flush whatever remains */
+		size_t srcSize = 0;
+		size_t sizeWritten = output->size - output->pos;
+		size_t const notEnded =
+		    ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
+		size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+		op += sizeWritten;
+		if (remainingToFlush) {
+			output->pos += sizeWritten;
+			return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
+		}
+		/* create epilogue */
+		zcs->stage = zcss_final;
+		zcs->outBuffContentSize = !notEnded ? 0 : ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL,
+									   0); /* write epilogue, including final empty block, into outBuff */
+	}
+
+	/* flush epilogue */
+	{
+		size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+		size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+		op += flushed;
+		zcs->outBuffFlushedSize += flushed;
+		output->pos += op - ostart;
+		if (toFlush == flushed)
+			zcs->stage = zcss_init; /* end reached */
+		return toFlush - flushed;
+	}
+}
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_DEFAULT_CLEVEL 1
+#define ZSTD_MAX_CLEVEL 22
+int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL + 1] = {
+    {
+	/* "default" */
+	/* W,  C,  H,  S,  L, TL, strat */
+	{18, 12, 12, 1, 7, 16, ZSTD_fast},    /* level  0 - never used */
+	{19, 13, 14, 1, 7, 16, ZSTD_fast},    /* level  1 */
+	{19, 15, 16, 1, 6, 16, ZSTD_fast},    /* level  2 */
+	{20, 16, 17, 1, 5, 16, ZSTD_dfast},   /* level  3.*/
+	{20, 18, 18, 1, 5, 16, ZSTD_dfast},   /* level  4.*/
+	{20, 15, 18, 3, 5, 16, ZSTD_greedy},  /* level  5 */
+	{21, 16, 19, 2, 5, 16, ZSTD_lazy},    /* level  6 */
+	{21, 17, 20, 3, 5, 16, ZSTD_lazy},    /* level  7 */
+	{21, 18, 20, 3, 5, 16, ZSTD_lazy2},   /* level  8 */
+	{21, 20, 20, 3, 5, 16, ZSTD_lazy2},   /* level  9 */
+	{21, 19, 21, 4, 5, 16, ZSTD_lazy2},   /* level 10 */
+	{22, 20, 22, 4, 5, 16, ZSTD_lazy2},   /* level 11 */
+	{22, 20, 22, 5, 5, 16, ZSTD_lazy2},   /* level 12 */
+	{22, 21, 22, 5, 5, 16, ZSTD_lazy2},   /* level 13 */
+	{22, 21, 22, 6, 5, 16, ZSTD_lazy2},   /* level 14 */
+	{22, 21, 21, 5, 5, 16, ZSTD_btlazy2}, /* level 15 */
+	{23, 22, 22, 5, 5, 16, ZSTD_btlazy2}, /* level 16 */
+	{23, 21, 22, 4, 5, 24, ZSTD_btopt},   /* level 17 */
+	{23, 23, 22, 6, 5, 32, ZSTD_btopt},   /* level 18 */
+	{23, 23, 22, 6, 3, 48, ZSTD_btopt},   /* level 19 */
+	{25, 25, 23, 7, 3, 64, ZSTD_btopt2},  /* level 20 */
+	{26, 26, 23, 7, 3, 256, ZSTD_btopt2}, /* level 21 */
+	{27, 27, 25, 9, 3, 512, ZSTD_btopt2}, /* level 22 */
+    },
+    {
+	/* for srcSize <= 256 KB */
+	/* W,  C,  H,  S,  L,  T, strat */
+	{0, 0, 0, 0, 0, 0, ZSTD_fast},	 /* level  0 - not used */
+	{18, 13, 14, 1, 6, 8, ZSTD_fast},      /* level  1 */
+	{18, 14, 13, 1, 5, 8, ZSTD_dfast},     /* level  2 */
+	{18, 16, 15, 1, 5, 8, ZSTD_dfast},     /* level  3 */
+	{18, 15, 17, 1, 5, 8, ZSTD_greedy},    /* level  4.*/
+	{18, 16, 17, 4, 5, 8, ZSTD_greedy},    /* level  5.*/
+	{18, 16, 17, 3, 5, 8, ZSTD_lazy},      /* level  6.*/
+	{18, 17, 17, 4, 4, 8, ZSTD_lazy},      /* level  7 */
+	{18, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
+	{18, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
+	{18, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
+	{18, 18, 17, 6, 4, 8, ZSTD_lazy2},     /* level 11.*/
+	{18, 18, 17, 7, 4, 8, ZSTD_lazy2},     /* level 12.*/
+	{18, 19, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13 */
+	{18, 18, 18, 4, 4, 16, ZSTD_btopt},    /* level 14.*/
+	{18, 18, 18, 4, 3, 16, ZSTD_btopt},    /* level 15.*/
+	{18, 19, 18, 6, 3, 32, ZSTD_btopt},    /* level 16.*/
+	{18, 19, 18, 8, 3, 64, ZSTD_btopt},    /* level 17.*/
+	{18, 19, 18, 9, 3, 128, ZSTD_btopt},   /* level 18.*/
+	{18, 19, 18, 10, 3, 256, ZSTD_btopt},  /* level 19.*/
+	{18, 19, 18, 11, 3, 512, ZSTD_btopt2}, /* level 20.*/
+	{18, 19, 18, 12, 3, 512, ZSTD_btopt2}, /* level 21.*/
+	{18, 19, 18, 13, 3, 512, ZSTD_btopt2}, /* level 22.*/
+    },
+    {
+	/* for srcSize <= 128 KB */
+	/* W,  C,  H,  S,  L,  T, strat */
+	{17, 12, 12, 1, 7, 8, ZSTD_fast},      /* level  0 - not used */
+	{17, 12, 13, 1, 6, 8, ZSTD_fast},      /* level  1 */
+	{17, 13, 16, 1, 5, 8, ZSTD_fast},      /* level  2 */
+	{17, 16, 16, 2, 5, 8, ZSTD_dfast},     /* level  3 */
+	{17, 13, 15, 3, 4, 8, ZSTD_greedy},    /* level  4 */
+	{17, 15, 17, 4, 4, 8, ZSTD_greedy},    /* level  5 */
+	{17, 16, 17, 3, 4, 8, ZSTD_lazy},      /* level  6 */
+	{17, 15, 17, 4, 4, 8, ZSTD_lazy2},     /* level  7 */
+	{17, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
+	{17, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
+	{17, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
+	{17, 17, 17, 7, 4, 8, ZSTD_lazy2},     /* level 11 */
+	{17, 17, 17, 8, 4, 8, ZSTD_lazy2},     /* level 12 */
+	{17, 18, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13.*/
+	{17, 17, 17, 7, 3, 8, ZSTD_btopt},     /* level 14.*/
+	{17, 17, 17, 7, 3, 16, ZSTD_btopt},    /* level 15.*/
+	{17, 18, 17, 7, 3, 32, ZSTD_btopt},    /* level 16.*/
+	{17, 18, 17, 7, 3, 64, ZSTD_btopt},    /* level 17.*/
+	{17, 18, 17, 7, 3, 256, ZSTD_btopt},   /* level 18.*/
+	{17, 18, 17, 8, 3, 256, ZSTD_btopt},   /* level 19.*/
+	{17, 18, 17, 9, 3, 256, ZSTD_btopt2},  /* level 20.*/
+	{17, 18, 17, 10, 3, 256, ZSTD_btopt2}, /* level 21.*/
+	{17, 18, 17, 11, 3, 512, ZSTD_btopt2}, /* level 22.*/
+    },
+    {
+	/* for srcSize <= 16 KB */
+	/* W,  C,  H,  S,  L,  T, strat */
+	{14, 12, 12, 1, 7, 6, ZSTD_fast},      /* level  0 - not used */
+	{14, 14, 14, 1, 6, 6, ZSTD_fast},      /* level  1 */
+	{14, 14, 14, 1, 4, 6, ZSTD_fast},      /* level  2 */
+	{14, 14, 14, 1, 4, 6, ZSTD_dfast},     /* level  3.*/
+	{14, 14, 14, 4, 4, 6, ZSTD_greedy},    /* level  4.*/
+	{14, 14, 14, 3, 4, 6, ZSTD_lazy},      /* level  5.*/
+	{14, 14, 14, 4, 4, 6, ZSTD_lazy2},     /* level  6 */
+	{14, 14, 14, 5, 4, 6, ZSTD_lazy2},     /* level  7 */
+	{14, 14, 14, 6, 4, 6, ZSTD_lazy2},     /* level  8.*/
+	{14, 15, 14, 6, 4, 6, ZSTD_btlazy2},   /* level  9.*/
+	{14, 15, 14, 3, 3, 6, ZSTD_btopt},     /* level 10.*/
+	{14, 15, 14, 6, 3, 8, ZSTD_btopt},     /* level 11.*/
+	{14, 15, 14, 6, 3, 16, ZSTD_btopt},    /* level 12.*/
+	{14, 15, 14, 6, 3, 24, ZSTD_btopt},    /* level 13.*/
+	{14, 15, 15, 6, 3, 48, ZSTD_btopt},    /* level 14.*/
+	{14, 15, 15, 6, 3, 64, ZSTD_btopt},    /* level 15.*/
+	{14, 15, 15, 6, 3, 96, ZSTD_btopt},    /* level 16.*/
+	{14, 15, 15, 6, 3, 128, ZSTD_btopt},   /* level 17.*/
+	{14, 15, 15, 6, 3, 256, ZSTD_btopt},   /* level 18.*/
+	{14, 15, 15, 7, 3, 256, ZSTD_btopt},   /* level 19.*/
+	{14, 15, 15, 8, 3, 256, ZSTD_btopt2},  /* level 20.*/
+	{14, 15, 15, 9, 3, 256, ZSTD_btopt2},  /* level 21.*/
+	{14, 15, 15, 10, 3, 256, ZSTD_btopt2}, /* level 22.*/
+    },
+};
+
+/*! ZSTD_getCParams() :
+*   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
+*   Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
+{
+	ZSTD_compressionParameters cp;
+	size_t const addedSize = srcSize ? 0 : 500;
+	U64 const rSize = srcSize + dictSize ? srcSize + dictSize + addedSize : (U64)-1;
+	U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
+	if (compressionLevel <= 0)
+		compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
+	if (compressionLevel > ZSTD_MAX_CLEVEL)
+		compressionLevel = ZSTD_MAX_CLEVEL;
+	cp = ZSTD_defaultCParameters[tableID][compressionLevel];
+	if (ZSTD_32bits()) { /* auto-correction, for 32-bits mode */
+		if (cp.windowLog > ZSTD_WINDOWLOG_MAX)
+			cp.windowLog = ZSTD_WINDOWLOG_MAX;
+		if (cp.chainLog > ZSTD_CHAINLOG_MAX)
+			cp.chainLog = ZSTD_CHAINLOG_MAX;
+		if (cp.hashLog > ZSTD_HASHLOG_MAX)
+			cp.hashLog = ZSTD_HASHLOG_MAX;
+	}
+	cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
+	return cp;
+}
+
+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
+{
+	ZSTD_parameters params;
+	ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+	memset(&params, 0, sizeof(params));
+	params.cParams = cParams;
+	return params;
+}
+
+EXPORT_SYMBOL(ZSTD_maxCLevel);
+EXPORT_SYMBOL(ZSTD_compressBound);
+
+EXPORT_SYMBOL(ZSTD_CCtxWorkspaceBound);
+EXPORT_SYMBOL(ZSTD_initCCtx);
+EXPORT_SYMBOL(ZSTD_compressCCtx);
+EXPORT_SYMBOL(ZSTD_compress_usingDict);
+
+EXPORT_SYMBOL(ZSTD_CDictWorkspaceBound);
+EXPORT_SYMBOL(ZSTD_initCDict);
+EXPORT_SYMBOL(ZSTD_compress_usingCDict);
+
+EXPORT_SYMBOL(ZSTD_CStreamWorkspaceBound);
+EXPORT_SYMBOL(ZSTD_initCStream);
+EXPORT_SYMBOL(ZSTD_initCStream_usingCDict);
+EXPORT_SYMBOL(ZSTD_resetCStream);
+EXPORT_SYMBOL(ZSTD_compressStream);
+EXPORT_SYMBOL(ZSTD_flushStream);
+EXPORT_SYMBOL(ZSTD_endStream);
+EXPORT_SYMBOL(ZSTD_CStreamInSize);
+EXPORT_SYMBOL(ZSTD_CStreamOutSize);
+
+EXPORT_SYMBOL(ZSTD_getCParams);
+EXPORT_SYMBOL(ZSTD_getParams);
+EXPORT_SYMBOL(ZSTD_checkCParams);
+EXPORT_SYMBOL(ZSTD_adjustCParams);
+
+EXPORT_SYMBOL(ZSTD_compressBegin);
+EXPORT_SYMBOL(ZSTD_compressBegin_usingDict);
+EXPORT_SYMBOL(ZSTD_compressBegin_advanced);
+EXPORT_SYMBOL(ZSTD_copyCCtx);
+EXPORT_SYMBOL(ZSTD_compressBegin_usingCDict);
+EXPORT_SYMBOL(ZSTD_compressContinue);
+EXPORT_SYMBOL(ZSTD_compressEnd);
+
+EXPORT_SYMBOL(ZSTD_getBlockSizeMax);
+EXPORT_SYMBOL(ZSTD_compressBlock);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");

diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
new file mode 100644
index 0000000..b178467
--- /dev/null
+++ b/lib/zstd/decompress.c

@@ -0,0 +1,2528 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MAXWINDOWSIZE_DEFAULT :
+*  maximum window size accepted by DStream, by default.
+*  Frames requiring more memory will be rejected.
+*/
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */
+#endif
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "fse.h"
+#include "huf.h"
+#include "mem.h" /* low level memory routines */
+#include "zstd_internal.h"
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h> /* memcpy, memmove, memset */
+
+#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
+
+/*-*************************************
+*  Macros
+***************************************/
+#define ZSTD_isError ERR_isError /* for inlining */
+#define FSE_isError ERR_isError
+#define HUF_isError ERR_isError
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); }
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+typedef enum {
+	ZSTDds_getFrameHeaderSize,
+	ZSTDds_decodeFrameHeader,
+	ZSTDds_decodeBlockHeader,
+	ZSTDds_decompressBlock,
+	ZSTDds_decompressLastBlock,
+	ZSTDds_checkChecksum,
+	ZSTDds_decodeSkippableHeader,
+	ZSTDds_skipFrame
+} ZSTD_dStage;
+
+typedef struct {
+	FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+	FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+	FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+	HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
+	U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2];
+	U32 rep[ZSTD_REP_NUM];
+} ZSTD_entropyTables_t;
+
+struct ZSTD_DCtx_s {
+	const FSE_DTable *LLTptr;
+	const FSE_DTable *MLTptr;
+	const FSE_DTable *OFTptr;
+	const HUF_DTable *HUFptr;
+	ZSTD_entropyTables_t entropy;
+	const void *previousDstEnd; /* detect continuity */
+	const void *base;	   /* start of curr segment */
+	const void *vBase;	  /* virtual start of previous segment if it was just before curr one */
+	const void *dictEnd;	/* end of previous segment */
+	size_t expected;
+	ZSTD_frameParams fParams;
+	blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+	ZSTD_dStage stage;
+	U32 litEntropy;
+	U32 fseEntropy;
+	struct xxh64_state xxhState;
+	size_t headerSize;
+	U32 dictID;
+	const BYTE *litPtr;
+	ZSTD_customMem customMem;
+	size_t litSize;
+	size_t rleSize;
+	BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
+	BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+size_t ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); }
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx)
+{
+	dctx->expected = ZSTD_frameHeaderSize_prefix;
+	dctx->stage = ZSTDds_getFrameHeaderSize;
+	dctx->previousDstEnd = NULL;
+	dctx->base = NULL;
+	dctx->vBase = NULL;
+	dctx->dictEnd = NULL;
+	dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
+	dctx->litEntropy = dctx->fseEntropy = 0;
+	dctx->dictID = 0;
+	ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+	memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
+	dctx->LLTptr = dctx->entropy.LLTable;
+	dctx->MLTptr = dctx->entropy.MLTable;
+	dctx->OFTptr = dctx->entropy.OFTable;
+	dctx->HUFptr = dctx->entropy.hufTable;
+	return 0;
+}
+
+ZSTD_DCtx *ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+	ZSTD_DCtx *dctx;
+
+	if (!customMem.customAlloc || !customMem.customFree)
+		return NULL;
+
+	dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem);
+	if (!dctx)
+		return NULL;
+	memcpy(&dctx->customMem, &customMem, sizeof(customMem));
+	ZSTD_decompressBegin(dctx);
+	return dctx;
+}
+
+ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
+	return ZSTD_createDCtx_advanced(stackMem);
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx)
+{
+	if (dctx == NULL)
+		return 0; /* support free on NULL */
+	ZSTD_free(dctx, dctx->customMem);
+	return 0; /* reserved as a potential error code in the future */
+}
+
+void ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx)
+{
+	size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
+	memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
+}
+
+static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict);
+
+/*-*************************************************************
+*   Decompression section
+***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void *buffer, size_t size)
+{
+	if (size < 4)
+		return 0;
+	{
+		U32 const magic = ZSTD_readLE32(buffer);
+		if (magic == ZSTD_MAGICNUMBER)
+			return 1;
+		if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START)
+			return 1;
+	}
+	return 0;
+}
+
+/** ZSTD_frameHeaderSize() :
+*   srcSize must be >= ZSTD_frameHeaderSize_prefix.
+*   @return : size of the Frame Header */
+static size_t ZSTD_frameHeaderSize(const void *src, size_t srcSize)
+{
+	if (srcSize < ZSTD_frameHeaderSize_prefix)
+		return ERROR(srcSize_wrong);
+	{
+		BYTE const fhd = ((const BYTE *)src)[4];
+		U32 const dictID = fhd & 3;
+		U32 const singleSegment = (fhd >> 5) & 1;
+		U32 const fcsId = fhd >> 6;
+		return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId);
+	}
+}
+
+/** ZSTD_getFrameParams() :
+*   decode Frame Header, or require larger `srcSize`.
+*   @return : 0, `fparamsPtr` is correctly filled,
+*            >0, `srcSize` is too small, result is expected `srcSize`,
+*             or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize)
+{
+	const BYTE *ip = (const BYTE *)src;
+
+	if (srcSize < ZSTD_frameHeaderSize_prefix)
+		return ZSTD_frameHeaderSize_prefix;
+	if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) {
+		if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+			if (srcSize < ZSTD_skippableHeaderSize)
+				return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */
+			memset(fparamsPtr, 0, sizeof(*fparamsPtr));
+			fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4);
+			fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
+			return 0;
+		}
+		return ERROR(prefix_unknown);
+	}
+
+	/* ensure there is enough `srcSize` to fully read/decode frame header */
+	{
+		size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
+		if (srcSize < fhsize)
+			return fhsize;
+	}
+
+	{
+		BYTE const fhdByte = ip[4];
+		size_t pos = 5;
+		U32 const dictIDSizeCode = fhdByte & 3;
+		U32 const checksumFlag = (fhdByte >> 2) & 1;
+		U32 const singleSegment = (fhdByte >> 5) & 1;
+		U32 const fcsID = fhdByte >> 6;
+		U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
+		U32 windowSize = 0;
+		U32 dictID = 0;
+		U64 frameContentSize = 0;
+		if ((fhdByte & 0x08) != 0)
+			return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */
+		if (!singleSegment) {
+			BYTE const wlByte = ip[pos++];
+			U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+			if (windowLog > ZSTD_WINDOWLOG_MAX)
+				return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */
+			windowSize = (1U << windowLog);
+			windowSize += (windowSize >> 3) * (wlByte & 7);
+		}
+
+		switch (dictIDSizeCode) {
+		default: /* impossible */
+		case 0: break;
+		case 1:
+			dictID = ip[pos];
+			pos++;
+			break;
+		case 2:
+			dictID = ZSTD_readLE16(ip + pos);
+			pos += 2;
+			break;
+		case 3:
+			dictID = ZSTD_readLE32(ip + pos);
+			pos += 4;
+			break;
+		}
+		switch (fcsID) {
+		default: /* impossible */
+		case 0:
+			if (singleSegment)
+				frameContentSize = ip[pos];
+			break;
+		case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break;
+		case 2: frameContentSize = ZSTD_readLE32(ip + pos); break;
+		case 3: frameContentSize = ZSTD_readLE64(ip + pos); break;
+		}
+		if (!windowSize)
+			windowSize = (U32)frameContentSize;
+		if (windowSize > windowSizeMax)
+			return ERROR(frameParameter_windowTooLarge);
+		fparamsPtr->frameContentSize = frameContentSize;
+		fparamsPtr->windowSize = windowSize;
+		fparamsPtr->dictID = dictID;
+		fparamsPtr->checksumFlag = checksumFlag;
+	}
+	return 0;
+}
+
+/** ZSTD_getFrameContentSize() :
+*   compatible with legacy mode
+*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+	{
+		ZSTD_frameParams fParams;
+		if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0)
+			return ZSTD_CONTENTSIZE_ERROR;
+		if (fParams.windowSize == 0) {
+			/* Either skippable or empty frame, size == 0 either way */
+			return 0;
+		} else if (fParams.frameContentSize != 0) {
+			return fParams.frameContentSize;
+		} else {
+			return ZSTD_CONTENTSIZE_UNKNOWN;
+		}
+	}
+}
+
+/** ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize)
+{
+	{
+		unsigned long long totalDstSize = 0;
+		while (srcSize >= ZSTD_frameHeaderSize_prefix) {
+			const U32 magicNumber = ZSTD_readLE32(src);
+
+			if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+				size_t skippableSize;
+				if (srcSize < ZSTD_skippableHeaderSize)
+					return ERROR(srcSize_wrong);
+				skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
+				if (srcSize < skippableSize) {
+					return ZSTD_CONTENTSIZE_ERROR;
+				}
+
+				src = (const BYTE *)src + skippableSize;
+				srcSize -= skippableSize;
+				continue;
+			}
+
+			{
+				unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+				if (ret >= ZSTD_CONTENTSIZE_ERROR)
+					return ret;
+
+				/* check for overflow */
+				if (totalDstSize + ret < totalDstSize)
+					return ZSTD_CONTENTSIZE_ERROR;
+				totalDstSize += ret;
+			}
+			{
+				size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+				if (ZSTD_isError(frameSrcSize)) {
+					return ZSTD_CONTENTSIZE_ERROR;
+				}
+
+				src = (const BYTE *)src + frameSrcSize;
+				srcSize -= frameSrcSize;
+			}
+		}
+
+		if (srcSize) {
+			return ZSTD_CONTENTSIZE_ERROR;
+		}
+
+		return totalDstSize;
+	}
+}
+
+/** ZSTD_decodeFrameHeader() :
+*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize)
+{
+	size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize);
+	if (ZSTD_isError(result))
+		return result; /* invalid header */
+	if (result > 0)
+		return ERROR(srcSize_wrong); /* headerSize too small */
+	if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
+		return ERROR(dictionary_wrong);
+	if (dctx->fParams.checksumFlag)
+		xxh64_reset(&dctx->xxhState, 0);
+	return 0;
+}
+
+typedef struct {
+	blockType_e blockType;
+	U32 lastBlock;
+	U32 origSize;
+} blockProperties_t;
+
+/*! ZSTD_getcBlockSize() :
+*   Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr)
+{
+	if (srcSize < ZSTD_blockHeaderSize)
+		return ERROR(srcSize_wrong);
+	{
+		U32 const cBlockHeader = ZSTD_readLE24(src);
+		U32 const cSize = cBlockHeader >> 3;
+		bpPtr->lastBlock = cBlockHeader & 1;
+		bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+		bpPtr->origSize = cSize; /* only useful for RLE */
+		if (bpPtr->blockType == bt_rle)
+			return 1;
+		if (bpPtr->blockType == bt_reserved)
+			return ERROR(corruption_detected);
+		return cSize;
+	}
+}
+
+static size_t ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	if (srcSize > dstCapacity)
+		return ERROR(dstSize_tooSmall);
+	memcpy(dst, src, srcSize);
+	return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize)
+{
+	if (srcSize != 1)
+		return ERROR(srcSize_wrong);
+	if (regenSize > dstCapacity)
+		return ERROR(dstSize_tooSmall);
+	memset(dst, *(const BYTE *)src, regenSize);
+	return regenSize;
+}
+
+/*! ZSTD_decodeLiteralsBlock() :
+	@return : nb of bytes read from src (< srcSize ) */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
+{
+	if (srcSize < MIN_CBLOCK_SIZE)
+		return ERROR(corruption_detected);
+
+	{
+		const BYTE *const istart = (const BYTE *)src;
+		symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+		switch (litEncType) {
+		case set_repeat:
+			if (dctx->litEntropy == 0)
+				return ERROR(dictionary_corrupted);
+		/* fall-through */
+		case set_compressed:
+			if (srcSize < 5)
+				return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
+			{
+				size_t lhSize, litSize, litCSize;
+				U32 singleStream = 0;
+				U32 const lhlCode = (istart[0] >> 2) & 3;
+				U32 const lhc = ZSTD_readLE32(istart);
+				switch (lhlCode) {
+				case 0:
+				case 1:
+				default: /* note : default is impossible, since lhlCode into [0..3] */
+					/* 2 - 2 - 10 - 10 */
+					singleStream = !lhlCode;
+					lhSize = 3;
+					litSize = (lhc >> 4) & 0x3FF;
+					litCSize = (lhc >> 14) & 0x3FF;
+					break;
+				case 2:
+					/* 2 - 2 - 14 - 14 */
+					lhSize = 4;
+					litSize = (lhc >> 4) & 0x3FFF;
+					litCSize = lhc >> 18;
+					break;
+				case 3:
+					/* 2 - 2 - 18 - 18 */
+					lhSize = 5;
+					litSize = (lhc >> 4) & 0x3FFFF;
+					litCSize = (lhc >> 22) + (istart[4] << 10);
+					break;
+				}
+				if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
+					return ERROR(corruption_detected);
+				if (litCSize + lhSize > srcSize)
+					return ERROR(corruption_detected);
+
+				if (HUF_isError(
+					(litEncType == set_repeat)
+					    ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)
+							    : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr))
+					    : (singleStream
+						   ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
+										 dctx->entropy.workspace, sizeof(dctx->entropy.workspace))
+						   : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
+										   dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
+					return ERROR(corruption_detected);
+
+				dctx->litPtr = dctx->litBuffer;
+				dctx->litSize = litSize;
+				dctx->litEntropy = 1;
+				if (litEncType == set_compressed)
+					dctx->HUFptr = dctx->entropy.hufTable;
+				memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+				return litCSize + lhSize;
+			}
+
+		case set_basic: {
+			size_t litSize, lhSize;
+			U32 const lhlCode = ((istart[0]) >> 2) & 3;
+			switch (lhlCode) {
+			case 0:
+			case 2:
+			default: /* note : default is impossible, since lhlCode into [0..3] */
+				lhSize = 1;
+				litSize = istart[0] >> 3;
+				break;
+			case 1:
+				lhSize = 2;
+				litSize = ZSTD_readLE16(istart) >> 4;
+				break;
+			case 3:
+				lhSize = 3;
+				litSize = ZSTD_readLE24(istart) >> 4;
+				break;
+			}
+
+			if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
+				if (litSize + lhSize > srcSize)
+					return ERROR(corruption_detected);
+				memcpy(dctx->litBuffer, istart + lhSize, litSize);
+				dctx->litPtr = dctx->litBuffer;
+				dctx->litSize = litSize;
+				memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+				return lhSize + litSize;
+			}
+			/* direct reference into compressed stream */
+			dctx->litPtr = istart + lhSize;
+			dctx->litSize = litSize;
+			return lhSize + litSize;
+		}
+
+		case set_rle: {
+			U32 const lhlCode = ((istart[0]) >> 2) & 3;
+			size_t litSize, lhSize;
+			switch (lhlCode) {
+			case 0:
+			case 2:
+			default: /* note : default is impossible, since lhlCode into [0..3] */
+				lhSize = 1;
+				litSize = istart[0] >> 3;
+				break;
+			case 1:
+				lhSize = 2;
+				litSize = ZSTD_readLE16(istart) >> 4;
+				break;
+			case 3:
+				lhSize = 3;
+				litSize = ZSTD_readLE24(istart) >> 4;
+				if (srcSize < 4)
+					return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+				break;
+			}
+			if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
+				return ERROR(corruption_detected);
+			memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+			dctx->litPtr = dctx->litBuffer;
+			dctx->litSize = litSize;
+			return lhSize + 1;
+		}
+		default:
+			return ERROR(corruption_detected); /* impossible */
+		}
+	}
+}
+
+typedef union {
+	FSE_decode_t realData;
+	U32 alignedBy4;
+} FSE_decode_t4;
+
+static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = {
+    {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
+    {{0, 0, 4}},		 /* 0 : base, symbol, bits */
+    {{16, 0, 4}},
+    {{32, 1, 5}},
+    {{0, 3, 5}},
+    {{0, 4, 5}},
+    {{0, 6, 5}},
+    {{0, 7, 5}},
+    {{0, 9, 5}},
+    {{0, 10, 5}},
+    {{0, 12, 5}},
+    {{0, 14, 6}},
+    {{0, 16, 5}},
+    {{0, 18, 5}},
+    {{0, 19, 5}},
+    {{0, 21, 5}},
+    {{0, 22, 5}},
+    {{0, 24, 5}},
+    {{32, 25, 5}},
+    {{0, 26, 5}},
+    {{0, 27, 6}},
+    {{0, 29, 6}},
+    {{0, 31, 6}},
+    {{32, 0, 4}},
+    {{0, 1, 4}},
+    {{0, 2, 5}},
+    {{32, 4, 5}},
+    {{0, 5, 5}},
+    {{32, 7, 5}},
+    {{0, 8, 5}},
+    {{32, 10, 5}},
+    {{0, 11, 5}},
+    {{0, 13, 6}},
+    {{32, 16, 5}},
+    {{0, 17, 5}},
+    {{32, 19, 5}},
+    {{0, 20, 5}},
+    {{32, 22, 5}},
+    {{0, 23, 5}},
+    {{0, 25, 4}},
+    {{16, 25, 4}},
+    {{32, 26, 5}},
+    {{0, 28, 6}},
+    {{0, 30, 6}},
+    {{48, 0, 4}},
+    {{16, 1, 4}},
+    {{32, 2, 5}},
+    {{32, 3, 5}},
+    {{32, 5, 5}},
+    {{32, 6, 5}},
+    {{32, 8, 5}},
+    {{32, 9, 5}},
+    {{32, 11, 5}},
+    {{32, 12, 5}},
+    {{0, 15, 6}},
+    {{32, 17, 5}},
+    {{32, 18, 5}},
+    {{32, 20, 5}},
+    {{32, 21, 5}},
+    {{32, 23, 5}},
+    {{32, 24, 5}},
+    {{0, 35, 6}},
+    {{0, 34, 6}},
+    {{0, 33, 6}},
+    {{0, 32, 6}},
+}; /* LL_defaultDTable */
+
+static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = {
+    {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
+    {{0, 0, 6}},		 /* 0 : base, symbol, bits */
+    {{0, 1, 4}},
+    {{32, 2, 5}},
+    {{0, 3, 5}},
+    {{0, 5, 5}},
+    {{0, 6, 5}},
+    {{0, 8, 5}},
+    {{0, 10, 6}},
+    {{0, 13, 6}},
+    {{0, 16, 6}},
+    {{0, 19, 6}},
+    {{0, 22, 6}},
+    {{0, 25, 6}},
+    {{0, 28, 6}},
+    {{0, 31, 6}},
+    {{0, 33, 6}},
+    {{0, 35, 6}},
+    {{0, 37, 6}},
+    {{0, 39, 6}},
+    {{0, 41, 6}},
+    {{0, 43, 6}},
+    {{0, 45, 6}},
+    {{16, 1, 4}},
+    {{0, 2, 4}},
+    {{32, 3, 5}},
+    {{0, 4, 5}},
+    {{32, 6, 5}},
+    {{0, 7, 5}},
+    {{0, 9, 6}},
+    {{0, 12, 6}},
+    {{0, 15, 6}},
+    {{0, 18, 6}},
+    {{0, 21, 6}},
+    {{0, 24, 6}},
+    {{0, 27, 6}},
+    {{0, 30, 6}},
+    {{0, 32, 6}},
+    {{0, 34, 6}},
+    {{0, 36, 6}},
+    {{0, 38, 6}},
+    {{0, 40, 6}},
+    {{0, 42, 6}},
+    {{0, 44, 6}},
+    {{32, 1, 4}},
+    {{48, 1, 4}},
+    {{16, 2, 4}},
+    {{32, 4, 5}},
+    {{32, 5, 5}},
+    {{32, 7, 5}},
+    {{32, 8, 5}},
+    {{0, 11, 6}},
+    {{0, 14, 6}},
+    {{0, 17, 6}},
+    {{0, 20, 6}},
+    {{0, 23, 6}},
+    {{0, 26, 6}},
+    {{0, 29, 6}},
+    {{0, 52, 6}},
+    {{0, 51, 6}},
+    {{0, 50, 6}},
+    {{0, 49, 6}},
+    {{0, 48, 6}},
+    {{0, 47, 6}},
+    {{0, 46, 6}},
+}; /* ML_defaultDTable */
+
+static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = {
+    {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
+    {{0, 0, 5}},		 /* 0 : base, symbol, bits */
+    {{0, 6, 4}},
+    {{0, 9, 5}},
+    {{0, 15, 5}},
+    {{0, 21, 5}},
+    {{0, 3, 5}},
+    {{0, 7, 4}},
+    {{0, 12, 5}},
+    {{0, 18, 5}},
+    {{0, 23, 5}},
+    {{0, 5, 5}},
+    {{0, 8, 4}},
+    {{0, 14, 5}},
+    {{0, 20, 5}},
+    {{0, 2, 5}},
+    {{16, 7, 4}},
+    {{0, 11, 5}},
+    {{0, 17, 5}},
+    {{0, 22, 5}},
+    {{0, 4, 5}},
+    {{16, 8, 4}},
+    {{0, 13, 5}},
+    {{0, 19, 5}},
+    {{0, 1, 5}},
+    {{16, 6, 4}},
+    {{0, 10, 5}},
+    {{0, 16, 5}},
+    {{0, 28, 5}},
+    {{0, 27, 5}},
+    {{0, 26, 5}},
+    {{0, 25, 5}},
+    {{0, 24, 5}},
+}; /* OF_defaultDTable */
+
+/*! ZSTD_buildSeqTable() :
+	@return : nb bytes read from src,
+			  or an error code if it fails, testable with ZSTD_isError()
+*/
+static size_t ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src,
+				 size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize)
+{
+	const void *const tmpPtr = defaultTable; /* bypass strict aliasing */
+	switch (type) {
+	case set_rle:
+		if (!srcSize)
+			return ERROR(srcSize_wrong);
+		if ((*(const BYTE *)src) > max)
+			return ERROR(corruption_detected);
+		FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src);
+		*DTablePtr = DTableSpace;
+		return 1;
+	case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0;
+	case set_repeat:
+		if (!flagRepeatTable)
+			return ERROR(corruption_detected);
+		return 0;
+	default: /* impossible */
+	case set_compressed: {
+		U32 tableLog;
+		S16 *norm = (S16 *)workspace;
+		size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
+
+		if ((spaceUsed32 << 2) > workspaceSize)
+			return ERROR(GENERIC);
+		workspace = (U32 *)workspace + spaceUsed32;
+		workspaceSize -= (spaceUsed32 << 2);
+		{
+			size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+			if (FSE_isError(headerSize))
+				return ERROR(corruption_detected);
+			if (tableLog > maxLog)
+				return ERROR(corruption_detected);
+			FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize);
+			*DTablePtr = DTableSpace;
+			return headerSize;
+		}
+	}
+	}
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize)
+{
+	const BYTE *const istart = (const BYTE *const)src;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *ip = istart;
+
+	/* check */
+	if (srcSize < MIN_SEQUENCES_SIZE)
+		return ERROR(srcSize_wrong);
+
+	/* SeqHead */
+	{
+		int nbSeq = *ip++;
+		if (!nbSeq) {
+			*nbSeqPtr = 0;
+			return 1;
+		}
+		if (nbSeq > 0x7F) {
+			if (nbSeq == 0xFF) {
+				if (ip + 2 > iend)
+					return ERROR(srcSize_wrong);
+				nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2;
+			} else {
+				if (ip >= iend)
+					return ERROR(srcSize_wrong);
+				nbSeq = ((nbSeq - 0x80) << 8) + *ip++;
+			}
+		}
+		*nbSeqPtr = nbSeq;
+	}
+
+	/* FSE table descriptors */
+	if (ip + 4 > iend)
+		return ERROR(srcSize_wrong); /* minimum possible size */
+	{
+		symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+		symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+		symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+		ip++;
+
+		/* Build DTables */
+		{
+			size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip,
+								  LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
+			if (ZSTD_isError(llhSize))
+				return ERROR(corruption_detected);
+			ip += llhSize;
+		}
+		{
+			size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip,
+								  OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
+			if (ZSTD_isError(ofhSize))
+				return ERROR(corruption_detected);
+			ip += ofhSize;
+		}
+		{
+			size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip,
+								  ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
+			if (ZSTD_isError(mlhSize))
+				return ERROR(corruption_detected);
+			ip += mlhSize;
+		}
+	}
+
+	return ip - istart;
+}
+
+typedef struct {
+	size_t litLength;
+	size_t matchLength;
+	size_t offset;
+	const BYTE *match;
+} seq_t;
+
+typedef struct {
+	BIT_DStream_t DStream;
+	FSE_DState_t stateLL;
+	FSE_DState_t stateOffb;
+	FSE_DState_t stateML;
+	size_t prevOffset[ZSTD_REP_NUM];
+	const BYTE *base;
+	size_t pos;
+	uPtrDiff gotoDict;
+} seqState_t;
+
+FORCE_NOINLINE
+size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
+			      const BYTE *const vBase, const BYTE *const dictEnd)
+{
+	BYTE *const oLitEnd = op + sequence.litLength;
+	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
+	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
+	const BYTE *match = oLitEnd - sequence.offset;
+
+	/* check */
+	if (oMatchEnd > oend)
+		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+	if (iLitEnd > litLimit)
+		return ERROR(corruption_detected); /* over-read beyond lit buffer */
+	if (oLitEnd <= oend_w)
+		return ERROR(GENERIC); /* Precondition */
+
+	/* copy literals */
+	if (op < oend_w) {
+		ZSTD_wildcopy(op, *litPtr, oend_w - op);
+		*litPtr += oend_w - op;
+		op = oend_w;
+	}
+	while (op < oLitEnd)
+		*op++ = *(*litPtr)++;
+
+	/* copy Match */
+	if (sequence.offset > (size_t)(oLitEnd - base)) {
+		/* offset beyond prefix */
+		if (sequence.offset > (size_t)(oLitEnd - vBase))
+			return ERROR(corruption_detected);
+		match = dictEnd - (base - match);
+		if (match + sequence.matchLength <= dictEnd) {
+			memmove(oLitEnd, match, sequence.matchLength);
+			return sequenceLength;
+		}
+		/* span extDict & currPrefixSegment */
+		{
+			size_t const length1 = dictEnd - match;
+			memmove(oLitEnd, match, length1);
+			op = oLitEnd + length1;
+			sequence.matchLength -= length1;
+			match = base;
+		}
+	}
+	while (op < oMatchEnd)
+		*op++ = *match++;
+	return sequenceLength;
+}
+
+static seq_t ZSTD_decodeSequence(seqState_t *seqState)
+{
+	seq_t seq;
+
+	U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
+	U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
+	U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
+
+	U32 const llBits = LL_bits[llCode];
+	U32 const mlBits = ML_bits[mlCode];
+	U32 const ofBits = ofCode;
+	U32 const totalBits = llBits + mlBits + ofBits;
+
+	static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
+					       20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
+
+	static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
+					       21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
+					       43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
+
+	static const U32 OF_base[MaxOff + 1] = {0,       1,	1,	5,	0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
+						0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
+						0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
+
+	/* sequence */
+	{
+		size_t offset;
+		if (!ofCode)
+			offset = 0;
+		else {
+			offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+			if (ZSTD_32bits())
+				BIT_reloadDStream(&seqState->DStream);
+		}
+
+		if (ofCode <= 1) {
+			offset += (llCode == 0);
+			if (offset) {
+				size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+				temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+				if (offset != 1)
+					seqState->prevOffset[2] = seqState->prevOffset[1];
+				seqState->prevOffset[1] = seqState->prevOffset[0];
+				seqState->prevOffset[0] = offset = temp;
+			} else {
+				offset = seqState->prevOffset[0];
+			}
+		} else {
+			seqState->prevOffset[2] = seqState->prevOffset[1];
+			seqState->prevOffset[1] = seqState->prevOffset[0];
+			seqState->prevOffset[0] = offset;
+		}
+		seq.offset = offset;
+	}
+
+	seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
+	if (ZSTD_32bits() && (mlBits + llBits > 24))
+		BIT_reloadDStream(&seqState->DStream);
+
+	seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
+	if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
+		BIT_reloadDStream(&seqState->DStream);
+
+	/* ANS state update */
+	FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
+	FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
+	if (ZSTD_32bits())
+		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
+	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
+
+	seq.match = NULL;
+
+	return seq;
+}
+
+FORCE_INLINE
+size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
+			 const BYTE *const vBase, const BYTE *const dictEnd)
+{
+	BYTE *const oLitEnd = op + sequence.litLength;
+	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
+	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
+	const BYTE *match = oLitEnd - sequence.offset;
+
+	/* check */
+	if (oMatchEnd > oend)
+		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+	if (iLitEnd > litLimit)
+		return ERROR(corruption_detected); /* over-read beyond lit buffer */
+	if (oLitEnd > oend_w)
+		return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
+
+	/* copy Literals */
+	ZSTD_copy8(op, *litPtr);
+	if (sequence.litLength > 8)
+		ZSTD_wildcopy(op + 8, (*litPtr) + 8,
+			      sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+	op = oLitEnd;
+	*litPtr = iLitEnd; /* update for next sequence */
+
+	/* copy Match */
+	if (sequence.offset > (size_t)(oLitEnd - base)) {
+		/* offset beyond prefix */
+		if (sequence.offset > (size_t)(oLitEnd - vBase))
+			return ERROR(corruption_detected);
+		match = dictEnd + (match - base);
+		if (match + sequence.matchLength <= dictEnd) {
+			memmove(oLitEnd, match, sequence.matchLength);
+			return sequenceLength;
+		}
+		/* span extDict & currPrefixSegment */
+		{
+			size_t const length1 = dictEnd - match;
+			memmove(oLitEnd, match, length1);
+			op = oLitEnd + length1;
+			sequence.matchLength -= length1;
+			match = base;
+			if (op > oend_w || sequence.matchLength < MINMATCH) {
+				U32 i;
+				for (i = 0; i < sequence.matchLength; ++i)
+					op[i] = match[i];
+				return sequenceLength;
+			}
+		}
+	}
+	/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+
+	/* match within prefix */
+	if (sequence.offset < 8) {
+		/* close range match, overlap */
+		static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+		static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
+		int const sub2 = dec64table[sequence.offset];
+		op[0] = match[0];
+		op[1] = match[1];
+		op[2] = match[2];
+		op[3] = match[3];
+		match += dec32table[sequence.offset];
+		ZSTD_copy4(op + 4, match);
+		match -= sub2;
+	} else {
+		ZSTD_copy8(op, match);
+	}
+	op += 8;
+	match += 8;
+
+	if (oMatchEnd > oend - (16 - MINMATCH)) {
+		if (op < oend_w) {
+			ZSTD_wildcopy(op, match, oend_w - op);
+			match += oend_w - op;
+			op = oend_w;
+		}
+		while (op < oMatchEnd)
+			*op++ = *match++;
+	} else {
+		ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
+	}
+	return sequenceLength;
+}
+
+static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
+{
+	const BYTE *ip = (const BYTE *)seqStart;
+	const BYTE *const iend = ip + seqSize;
+	BYTE *const ostart = (BYTE * const)dst;
+	BYTE *const oend = ostart + maxDstSize;
+	BYTE *op = ostart;
+	const BYTE *litPtr = dctx->litPtr;
+	const BYTE *const litEnd = litPtr + dctx->litSize;
+	const BYTE *const base = (const BYTE *)(dctx->base);
+	const BYTE *const vBase = (const BYTE *)(dctx->vBase);
+	const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
+	int nbSeq;
+
+	/* Build Decoding Tables */
+	{
+		size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
+		if (ZSTD_isError(seqHSize))
+			return seqHSize;
+		ip += seqHSize;
+	}
+
+	/* Regen sequences */
+	if (nbSeq) {
+		seqState_t seqState;
+		dctx->fseEntropy = 1;
+		{
+			U32 i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				seqState.prevOffset[i] = dctx->entropy.rep[i];
+		}
+		CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
+		FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+		FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+		FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+		for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) {
+			nbSeq--;
+			{
+				seq_t const sequence = ZSTD_decodeSequence(&seqState);
+				size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+				if (ZSTD_isError(oneSeqSize))
+					return oneSeqSize;
+				op += oneSeqSize;
+			}
+		}
+
+		/* check if reached exact end */
+		if (nbSeq)
+			return ERROR(corruption_detected);
+		/* save reps for next block */
+		{
+			U32 i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
+		}
+	}
+
+	/* last literal segment */
+	{
+		size_t const lastLLSize = litEnd - litPtr;
+		if (lastLLSize > (size_t)(oend - op))
+			return ERROR(dstSize_tooSmall);
+		memcpy(op, litPtr, lastLLSize);
+		op += lastLLSize;
+	}
+
+	return op - ostart;
+}
+
+FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets)
+{
+	seq_t seq;
+
+	U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
+	U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
+	U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
+
+	U32 const llBits = LL_bits[llCode];
+	U32 const mlBits = ML_bits[mlCode];
+	U32 const ofBits = ofCode;
+	U32 const totalBits = llBits + mlBits + ofBits;
+
+	static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
+					       20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
+
+	static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
+					       21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
+					       43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
+
+	static const U32 OF_base[MaxOff + 1] = {0,       1,	1,	5,	0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
+						0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
+						0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
+
+	/* sequence */
+	{
+		size_t offset;
+		if (!ofCode)
+			offset = 0;
+		else {
+			if (longOffsets) {
+				int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+				offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+				if (ZSTD_32bits() || extraBits)
+					BIT_reloadDStream(&seqState->DStream);
+				if (extraBits)
+					offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+			} else {
+				offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+				if (ZSTD_32bits())
+					BIT_reloadDStream(&seqState->DStream);
+			}
+		}
+
+		if (ofCode <= 1) {
+			offset += (llCode == 0);
+			if (offset) {
+				size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+				temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+				if (offset != 1)
+					seqState->prevOffset[2] = seqState->prevOffset[1];
+				seqState->prevOffset[1] = seqState->prevOffset[0];
+				seqState->prevOffset[0] = offset = temp;
+			} else {
+				offset = seqState->prevOffset[0];
+			}
+		} else {
+			seqState->prevOffset[2] = seqState->prevOffset[1];
+			seqState->prevOffset[1] = seqState->prevOffset[0];
+			seqState->prevOffset[0] = offset;
+		}
+		seq.offset = offset;
+	}
+
+	seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
+	if (ZSTD_32bits() && (mlBits + llBits > 24))
+		BIT_reloadDStream(&seqState->DStream);
+
+	seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
+	if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
+		BIT_reloadDStream(&seqState->DStream);
+
+	{
+		size_t const pos = seqState->pos + seq.litLength;
+		seq.match = seqState->base + pos - seq.offset; /* single memory segment */
+		if (seq.offset > pos)
+			seq.match += seqState->gotoDict; /* separate memory segment */
+		seqState->pos = pos + seq.matchLength;
+	}
+
+	/* ANS state update */
+	FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
+	FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
+	if (ZSTD_32bits())
+		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
+	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
+
+	return seq;
+}
+
+static seq_t ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize)
+{
+	if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
+		return ZSTD_decodeSequenceLong_generic(seqState, 1);
+	} else {
+		return ZSTD_decodeSequenceLong_generic(seqState, 0);
+	}
+}
+
+FORCE_INLINE
+size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
+			     const BYTE *const vBase, const BYTE *const dictEnd)
+{
+	BYTE *const oLitEnd = op + sequence.litLength;
+	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
+	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
+	const BYTE *match = sequence.match;
+
+	/* check */
+	if (oMatchEnd > oend)
+		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+	if (iLitEnd > litLimit)
+		return ERROR(corruption_detected); /* over-read beyond lit buffer */
+	if (oLitEnd > oend_w)
+		return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
+
+	/* copy Literals */
+	ZSTD_copy8(op, *litPtr);
+	if (sequence.litLength > 8)
+		ZSTD_wildcopy(op + 8, (*litPtr) + 8,
+			      sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+	op = oLitEnd;
+	*litPtr = iLitEnd; /* update for next sequence */
+
+	/* copy Match */
+	if (sequence.offset > (size_t)(oLitEnd - base)) {
+		/* offset beyond prefix */
+		if (sequence.offset > (size_t)(oLitEnd - vBase))
+			return ERROR(corruption_detected);
+		if (match + sequence.matchLength <= dictEnd) {
+			memmove(oLitEnd, match, sequence.matchLength);
+			return sequenceLength;
+		}
+		/* span extDict & currPrefixSegment */
+		{
+			size_t const length1 = dictEnd - match;
+			memmove(oLitEnd, match, length1);
+			op = oLitEnd + length1;
+			sequence.matchLength -= length1;
+			match = base;
+			if (op > oend_w || sequence.matchLength < MINMATCH) {
+				U32 i;
+				for (i = 0; i < sequence.matchLength; ++i)
+					op[i] = match[i];
+				return sequenceLength;
+			}
+		}
+	}
+	/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+
+	/* match within prefix */
+	if (sequence.offset < 8) {
+		/* close range match, overlap */
+		static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+		static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
+		int const sub2 = dec64table[sequence.offset];
+		op[0] = match[0];
+		op[1] = match[1];
+		op[2] = match[2];
+		op[3] = match[3];
+		match += dec32table[sequence.offset];
+		ZSTD_copy4(op + 4, match);
+		match -= sub2;
+	} else {
+		ZSTD_copy8(op, match);
+	}
+	op += 8;
+	match += 8;
+
+	if (oMatchEnd > oend - (16 - MINMATCH)) {
+		if (op < oend_w) {
+			ZSTD_wildcopy(op, match, oend_w - op);
+			match += oend_w - op;
+			op = oend_w;
+		}
+		while (op < oMatchEnd)
+			*op++ = *match++;
+	} else {
+		ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
+	}
+	return sequenceLength;
+}
+
+static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
+{
+	const BYTE *ip = (const BYTE *)seqStart;
+	const BYTE *const iend = ip + seqSize;
+	BYTE *const ostart = (BYTE * const)dst;
+	BYTE *const oend = ostart + maxDstSize;
+	BYTE *op = ostart;
+	const BYTE *litPtr = dctx->litPtr;
+	const BYTE *const litEnd = litPtr + dctx->litSize;
+	const BYTE *const base = (const BYTE *)(dctx->base);
+	const BYTE *const vBase = (const BYTE *)(dctx->vBase);
+	const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
+	unsigned const windowSize = dctx->fParams.windowSize;
+	int nbSeq;
+
+	/* Build Decoding Tables */
+	{
+		size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
+		if (ZSTD_isError(seqHSize))
+			return seqHSize;
+		ip += seqHSize;
+	}
+
+	/* Regen sequences */
+	if (nbSeq) {
+#define STORED_SEQS 4
+#define STOSEQ_MASK (STORED_SEQS - 1)
+#define ADVANCED_SEQS 4
+		seq_t *sequences = (seq_t *)dctx->entropy.workspace;
+		int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+		seqState_t seqState;
+		int seqNb;
+		ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS);
+		dctx->fseEntropy = 1;
+		{
+			U32 i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				seqState.prevOffset[i] = dctx->entropy.rep[i];
+		}
+		seqState.base = base;
+		seqState.pos = (size_t)(op - base);
+		seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */
+		CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
+		FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+		FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+		FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+		/* prepare in advance */
+		for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) {
+			sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize);
+		}
+		if (seqNb < seqAdvance)
+			return ERROR(corruption_detected);
+
+		/* decode and decompress */
+		for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) {
+			seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize);
+			size_t const oneSeqSize =
+			    ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
+			if (ZSTD_isError(oneSeqSize))
+				return oneSeqSize;
+			ZSTD_PREFETCH(sequence.match);
+			sequences[seqNb & STOSEQ_MASK] = sequence;
+			op += oneSeqSize;
+		}
+		if (seqNb < nbSeq)
+			return ERROR(corruption_detected);
+
+		/* finish queue */
+		seqNb -= seqAdvance;
+		for (; seqNb < nbSeq; seqNb++) {
+			size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
+			if (ZSTD_isError(oneSeqSize))
+				return oneSeqSize;
+			op += oneSeqSize;
+		}
+
+		/* save reps for next block */
+		{
+			U32 i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
+		}
+	}
+
+	/* last literal segment */
+	{
+		size_t const lastLLSize = litEnd - litPtr;
+		if (lastLLSize > (size_t)(oend - op))
+			return ERROR(dstSize_tooSmall);
+		memcpy(op, litPtr, lastLLSize);
+		op += lastLLSize;
+	}
+
+	return op - ostart;
+}
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{ /* blockType == blockCompressed */
+	const BYTE *ip = (const BYTE *)src;
+
+	if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX)
+		return ERROR(srcSize_wrong);
+
+	/* Decode literals section */
+	{
+		size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+		if (ZSTD_isError(litCSize))
+			return litCSize;
+		ip += litCSize;
+		srcSize -= litCSize;
+	}
+	if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
+				/* likely because of register pressure */
+				/* if that's the correct cause, then 32-bits ARM should be affected differently */
+				/* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
+		if (dctx->fParams.windowSize > (1 << 23))
+			return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
+	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+static void ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst)
+{
+	if (dst != dctx->previousDstEnd) { /* not contiguous */
+		dctx->dictEnd = dctx->previousDstEnd;
+		dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
+		dctx->base = dst;
+		dctx->previousDstEnd = dst;
+	}
+}
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	size_t dSize;
+	ZSTD_checkContinuity(dctx, dst);
+	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+	dctx->previousDstEnd = (char *)dst + dSize;
+	return dSize;
+}
+
+/** ZSTD_insertBlock() :
+	insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize)
+{
+	ZSTD_checkContinuity(dctx, blockStart);
+	dctx->previousDstEnd = (const char *)blockStart + blockSize;
+	return blockSize;
+}
+
+size_t ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length)
+{
+	if (length > dstCapacity)
+		return ERROR(dstSize_tooSmall);
+	memset(dst, byte, length);
+	return length;
+}
+
+/** ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+	if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+		return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4);
+	} else {
+		const BYTE *ip = (const BYTE *)src;
+		const BYTE *const ipstart = ip;
+		size_t remainingSize = srcSize;
+		ZSTD_frameParams fParams;
+
+		size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
+		if (ZSTD_isError(headerSize))
+			return headerSize;
+
+		/* Frame Header */
+		{
+			size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize);
+			if (ZSTD_isError(ret))
+				return ret;
+			if (ret > 0)
+				return ERROR(srcSize_wrong);
+		}
+
+		ip += headerSize;
+		remainingSize -= headerSize;
+
+		/* Loop on each block */
+		while (1) {
+			blockProperties_t blockProperties;
+			size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+			if (ZSTD_isError(cBlockSize))
+				return cBlockSize;
+
+			if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+				return ERROR(srcSize_wrong);
+
+			ip += ZSTD_blockHeaderSize + cBlockSize;
+			remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+
+			if (blockProperties.lastBlock)
+				break;
+		}
+
+		if (fParams.checksumFlag) { /* Frame content checksum */
+			if (remainingSize < 4)
+				return ERROR(srcSize_wrong);
+			ip += 4;
+			remainingSize -= 4;
+		}
+
+		return ip - ipstart;
+	}
+}
+
+/*! ZSTD_decompressFrame() :
+*   @dctx must be properly initialized */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr)
+{
+	const BYTE *ip = (const BYTE *)(*srcPtr);
+	BYTE *const ostart = (BYTE * const)dst;
+	BYTE *const oend = ostart + dstCapacity;
+	BYTE *op = ostart;
+	size_t remainingSize = *srcSizePtr;
+
+	/* check */
+	if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize)
+		return ERROR(srcSize_wrong);
+
+	/* Frame Header */
+	{
+		size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
+		if (ZSTD_isError(frameHeaderSize))
+			return frameHeaderSize;
+		if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize)
+			return ERROR(srcSize_wrong);
+		CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
+		ip += frameHeaderSize;
+		remainingSize -= frameHeaderSize;
+	}
+
+	/* Loop on each block */
+	while (1) {
+		size_t decodedSize;
+		blockProperties_t blockProperties;
+		size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+		if (ZSTD_isError(cBlockSize))
+			return cBlockSize;
+
+		ip += ZSTD_blockHeaderSize;
+		remainingSize -= ZSTD_blockHeaderSize;
+		if (cBlockSize > remainingSize)
+			return ERROR(srcSize_wrong);
+
+		switch (blockProperties.blockType) {
+		case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break;
+		case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break;
+		case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break;
+		case bt_reserved:
+		default: return ERROR(corruption_detected);
+		}
+
+		if (ZSTD_isError(decodedSize))
+			return decodedSize;
+		if (dctx->fParams.checksumFlag)
+			xxh64_update(&dctx->xxhState, op, decodedSize);
+		op += decodedSize;
+		ip += cBlockSize;
+		remainingSize -= cBlockSize;
+		if (blockProperties.lastBlock)
+			break;
+	}
+
+	if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+		U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
+		U32 checkRead;
+		if (remainingSize < 4)
+			return ERROR(checksum_wrong);
+		checkRead = ZSTD_readLE32(ip);
+		if (checkRead != checkCalc)
+			return ERROR(checksum_wrong);
+		ip += 4;
+		remainingSize -= 4;
+	}
+
+	/* Allow caller to get size read */
+	*srcPtr = ip;
+	*srcSizePtr = remainingSize;
+	return op - ostart;
+}
+
+static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict);
+static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict);
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
+					const ZSTD_DDict *ddict)
+{
+	void *const dststart = dst;
+
+	if (ddict) {
+		if (dict) {
+			/* programmer error, these two cases should be mutually exclusive */
+			return ERROR(GENERIC);
+		}
+
+		dict = ZSTD_DDictDictContent(ddict);
+		dictSize = ZSTD_DDictDictSize(ddict);
+	}
+
+	while (srcSize >= ZSTD_frameHeaderSize_prefix) {
+		U32 magicNumber;
+
+		magicNumber = ZSTD_readLE32(src);
+		if (magicNumber != ZSTD_MAGICNUMBER) {
+			if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+				size_t skippableSize;
+				if (srcSize < ZSTD_skippableHeaderSize)
+					return ERROR(srcSize_wrong);
+				skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
+				if (srcSize < skippableSize) {
+					return ERROR(srcSize_wrong);
+				}
+
+				src = (const BYTE *)src + skippableSize;
+				srcSize -= skippableSize;
+				continue;
+			} else {
+				return ERROR(prefix_unknown);
+			}
+		}
+
+		if (ddict) {
+			/* we were called from ZSTD_decompress_usingDDict */
+			ZSTD_refDDict(dctx, ddict);
+		} else {
+			/* this will initialize correctly with no dict if dict == NULL, so
+			 * use this in all cases but ddict */
+			CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
+		}
+		ZSTD_checkContinuity(dctx, dst);
+
+		{
+			const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize);
+			if (ZSTD_isError(res))
+				return res;
+			/* don't need to bounds check this, ZSTD_decompressFrame will have
+			 * already */
+			dst = (BYTE *)dst + res;
+			dstCapacity -= res;
+		}
+	}
+
+	if (srcSize)
+		return ERROR(srcSize_wrong); /* input not entirely consumed */
+
+	return (BYTE *)dst - (BYTE *)dststart;
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize)
+{
+	return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
+}
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; }
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx)
+{
+	switch (dctx->stage) {
+	default: /* should not happen */
+	case ZSTDds_getFrameHeaderSize:
+	case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader;
+	case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader;
+	case ZSTDds_decompressBlock: return ZSTDnit_block;
+	case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock;
+	case ZSTDds_checkChecksum: return ZSTDnit_checksum;
+	case ZSTDds_decodeSkippableHeader:
+	case ZSTDds_skipFrame: return ZSTDnit_skippableFrame;
+	}
+}
+
+int ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */
+
+/** ZSTD_decompressContinue() :
+*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+*             or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	/* Sanity check */
+	if (srcSize != dctx->expected)
+		return ERROR(srcSize_wrong);
+	if (dstCapacity)
+		ZSTD_checkContinuity(dctx, dst);
+
+	switch (dctx->stage) {
+	case ZSTDds_getFrameHeaderSize:
+		if (srcSize != ZSTD_frameHeaderSize_prefix)
+			return ERROR(srcSize_wrong);					/* impossible */
+		if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
+			memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
+			dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */
+			dctx->stage = ZSTDds_decodeSkippableHeader;
+			return 0;
+		}
+		dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix);
+		if (ZSTD_isError(dctx->headerSize))
+			return dctx->headerSize;
+		memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
+		if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) {
+			dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix;
+			dctx->stage = ZSTDds_decodeFrameHeader;
+			return 0;
+		}
+		dctx->expected = 0; /* not necessary to copy more */
+
+	case ZSTDds_decodeFrameHeader:
+		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
+		CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
+		dctx->expected = ZSTD_blockHeaderSize;
+		dctx->stage = ZSTDds_decodeBlockHeader;
+		return 0;
+
+	case ZSTDds_decodeBlockHeader: {
+		blockProperties_t bp;
+		size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+		if (ZSTD_isError(cBlockSize))
+			return cBlockSize;
+		dctx->expected = cBlockSize;
+		dctx->bType = bp.blockType;
+		dctx->rleSize = bp.origSize;
+		if (cBlockSize) {
+			dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+			return 0;
+		}
+		/* empty block */
+		if (bp.lastBlock) {
+			if (dctx->fParams.checksumFlag) {
+				dctx->expected = 4;
+				dctx->stage = ZSTDds_checkChecksum;
+			} else {
+				dctx->expected = 0; /* end of frame */
+				dctx->stage = ZSTDds_getFrameHeaderSize;
+			}
+		} else {
+			dctx->expected = 3; /* go directly to next header */
+			dctx->stage = ZSTDds_decodeBlockHeader;
+		}
+		return 0;
+	}
+	case ZSTDds_decompressLastBlock:
+	case ZSTDds_decompressBlock: {
+		size_t rSize;
+		switch (dctx->bType) {
+		case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break;
+		case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break;
+		case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break;
+		case bt_reserved: /* should never happen */
+		default: return ERROR(corruption_detected);
+		}
+		if (ZSTD_isError(rSize))
+			return rSize;
+		if (dctx->fParams.checksumFlag)
+			xxh64_update(&dctx->xxhState, dst, rSize);
+
+		if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
+			if (dctx->fParams.checksumFlag) {	/* another round for frame checksum */
+				dctx->expected = 4;
+				dctx->stage = ZSTDds_checkChecksum;
+			} else {
+				dctx->expected = 0; /* ends here */
+				dctx->stage = ZSTDds_getFrameHeaderSize;
+			}
+		} else {
+			dctx->stage = ZSTDds_decodeBlockHeader;
+			dctx->expected = ZSTD_blockHeaderSize;
+			dctx->previousDstEnd = (char *)dst + rSize;
+		}
+		return rSize;
+	}
+	case ZSTDds_checkChecksum: {
+		U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
+		U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */
+		if (check32 != h32)
+			return ERROR(checksum_wrong);
+		dctx->expected = 0;
+		dctx->stage = ZSTDds_getFrameHeaderSize;
+		return 0;
+	}
+	case ZSTDds_decodeSkippableHeader: {
+		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
+		dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4);
+		dctx->stage = ZSTDds_skipFrame;
+		return 0;
+	}
+	case ZSTDds_skipFrame: {
+		dctx->expected = 0;
+		dctx->stage = ZSTDds_getFrameHeaderSize;
+		return 0;
+	}
+	default:
+		return ERROR(GENERIC); /* impossible */
+	}
+}
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
+{
+	dctx->dictEnd = dctx->previousDstEnd;
+	dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
+	dctx->base = dict;
+	dctx->previousDstEnd = (const char *)dict + dictSize;
+	return 0;
+}
+
+/* ZSTD_loadEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary
+ * @return : size of entropy tables read */
+static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize)
+{
+	const BYTE *dictPtr = (const BYTE *)dict;
+	const BYTE *const dictEnd = dictPtr + dictSize;
+
+	if (dictSize <= 8)
+		return ERROR(dictionary_corrupted);
+	dictPtr += 8; /* skip header = magic + dictID */
+
+	{
+		size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace));
+		if (HUF_isError(hSize))
+			return ERROR(dictionary_corrupted);
+		dictPtr += hSize;
+	}
+
+	{
+		short offcodeNCount[MaxOff + 1];
+		U32 offcodeMaxValue = MaxOff, offcodeLog;
+		size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
+		if (FSE_isError(offcodeHeaderSize))
+			return ERROR(dictionary_corrupted);
+		if (offcodeLog > OffFSELog)
+			return ERROR(dictionary_corrupted);
+		CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
+		dictPtr += offcodeHeaderSize;
+	}
+
+	{
+		short matchlengthNCount[MaxML + 1];
+		unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+		size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
+		if (FSE_isError(matchlengthHeaderSize))
+			return ERROR(dictionary_corrupted);
+		if (matchlengthLog > MLFSELog)
+			return ERROR(dictionary_corrupted);
+		CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
+		dictPtr += matchlengthHeaderSize;
+	}
+
+	{
+		short litlengthNCount[MaxLL + 1];
+		unsigned litlengthMaxValue = MaxLL, litlengthLog;
+		size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
+		if (FSE_isError(litlengthHeaderSize))
+			return ERROR(dictionary_corrupted);
+		if (litlengthLog > LLFSELog)
+			return ERROR(dictionary_corrupted);
+		CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
+		dictPtr += litlengthHeaderSize;
+	}
+
+	if (dictPtr + 12 > dictEnd)
+		return ERROR(dictionary_corrupted);
+	{
+		int i;
+		size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12));
+		for (i = 0; i < 3; i++) {
+			U32 const rep = ZSTD_readLE32(dictPtr);
+			dictPtr += 4;
+			if (rep == 0 || rep >= dictContentSize)
+				return ERROR(dictionary_corrupted);
+			entropy->rep[i] = rep;
+		}
+	}
+
+	return dictPtr - (const BYTE *)dict;
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
+{
+	if (dictSize < 8)
+		return ZSTD_refDictContent(dctx, dict, dictSize);
+	{
+		U32 const magic = ZSTD_readLE32(dict);
+		if (magic != ZSTD_DICT_MAGIC) {
+			return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
+		}
+	}
+	dctx->dictID = ZSTD_readLE32((const char *)dict + 4);
+
+	/* load entropy tables */
+	{
+		size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
+		if (ZSTD_isError(eSize))
+			return ERROR(dictionary_corrupted);
+		dict = (const char *)dict + eSize;
+		dictSize -= eSize;
+	}
+	dctx->litEntropy = dctx->fseEntropy = 1;
+
+	/* reference dictionary content */
+	return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
+{
+	CHECK_F(ZSTD_decompressBegin(dctx));
+	if (dict && dictSize)
+		CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
+	return 0;
+}
+
+/* ======   ZSTD_DDict   ====== */
+
+struct ZSTD_DDict_s {
+	void *dictBuffer;
+	const void *dictContent;
+	size_t dictSize;
+	ZSTD_entropyTables_t entropy;
+	U32 dictID;
+	U32 entropyPresent;
+	ZSTD_customMem cMem;
+}; /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+size_t ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); }
+
+static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; }
+
+static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; }
+
+static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict)
+{
+	ZSTD_decompressBegin(dstDCtx); /* init */
+	if (ddict) {		       /* support refDDict on NULL */
+		dstDCtx->dictID = ddict->dictID;
+		dstDCtx->base = ddict->dictContent;
+		dstDCtx->vBase = ddict->dictContent;
+		dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize;
+		dstDCtx->previousDstEnd = dstDCtx->dictEnd;
+		if (ddict->entropyPresent) {
+			dstDCtx->litEntropy = 1;
+			dstDCtx->fseEntropy = 1;
+			dstDCtx->LLTptr = ddict->entropy.LLTable;
+			dstDCtx->MLTptr = ddict->entropy.MLTable;
+			dstDCtx->OFTptr = ddict->entropy.OFTable;
+			dstDCtx->HUFptr = ddict->entropy.hufTable;
+			dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
+			dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
+			dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
+		} else {
+			dstDCtx->litEntropy = 0;
+			dstDCtx->fseEntropy = 0;
+		}
+	}
+}
+
+static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict)
+{
+	ddict->dictID = 0;
+	ddict->entropyPresent = 0;
+	if (ddict->dictSize < 8)
+		return 0;
+	{
+		U32 const magic = ZSTD_readLE32(ddict->dictContent);
+		if (magic != ZSTD_DICT_MAGIC)
+			return 0; /* pure content mode */
+	}
+	ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4);
+
+	/* load entropy tables */
+	CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted);
+	ddict->entropyPresent = 1;
+	return 0;
+}
+
+static ZSTD_DDict *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
+{
+	if (!customMem.customAlloc || !customMem.customFree)
+		return NULL;
+
+	{
+		ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
+		if (!ddict)
+			return NULL;
+		ddict->cMem = customMem;
+
+		if ((byReference) || (!dict) || (!dictSize)) {
+			ddict->dictBuffer = NULL;
+			ddict->dictContent = dict;
+		} else {
+			void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
+			if (!internalBuffer) {
+				ZSTD_freeDDict(ddict);
+				return NULL;
+			}
+			memcpy(internalBuffer, dict, dictSize);
+			ddict->dictBuffer = internalBuffer;
+			ddict->dictContent = internalBuffer;
+		}
+		ddict->dictSize = dictSize;
+		ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
+		/* parse dictionary content */
+		{
+			size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict);
+			if (ZSTD_isError(errorCode)) {
+				ZSTD_freeDDict(ddict);
+				return NULL;
+			}
+		}
+
+		return ddict;
+	}
+}
+
+/*! ZSTD_initDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
+	return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem);
+}
+
+size_t ZSTD_freeDDict(ZSTD_DDict *ddict)
+{
+	if (ddict == NULL)
+		return 0; /* support free on NULL */
+	{
+		ZSTD_customMem const cMem = ddict->cMem;
+		ZSTD_free(ddict->dictBuffer, cMem);
+		ZSTD_free(ddict, cMem);
+		return 0;
+	}
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void *dict, size_t dictSize)
+{
+	if (dictSize < 8)
+		return 0;
+	if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC)
+		return 0;
+	return ZSTD_readLE32((const char *)dict + 4);
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict)
+{
+	if (ddict == NULL)
+		return 0;
+	return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void *src, size_t srcSize)
+{
+	ZSTD_frameParams zfp = {0, 0, 0, 0};
+	size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize);
+	if (ZSTD_isError(hError))
+		return 0;
+	return zfp.dictID;
+}
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict)
+{
+	/* pass content and size in case legacy frames are encountered */
+	return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict);
+}
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+/* *** Resource management *** */
+struct ZSTD_DStream_s {
+	ZSTD_DCtx *dctx;
+	ZSTD_DDict *ddictLocal;
+	const ZSTD_DDict *ddict;
+	ZSTD_frameParams fParams;
+	ZSTD_dStreamStage stage;
+	char *inBuff;
+	size_t inBuffSize;
+	size_t inPos;
+	size_t maxWindowSize;
+	char *outBuff;
+	size_t outBuffSize;
+	size_t outStart;
+	size_t outEnd;
+	size_t blockSize;
+	BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */
+	size_t lhSize;
+	ZSTD_customMem customMem;
+	void *legacyContext;
+	U32 previousLegacyVersion;
+	U32 legacyVersion;
+	U32 hostageByte;
+}; /* typedef'd to ZSTD_DStream within "zstd.h" */
+
+size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize)
+{
+	size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+	size_t const inBuffSize = blockSize;
+	size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
+	return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
+}
+
+static ZSTD_DStream *ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+	ZSTD_DStream *zds;
+
+	if (!customMem.customAlloc || !customMem.customFree)
+		return NULL;
+
+	zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem);
+	if (zds == NULL)
+		return NULL;
+	memset(zds, 0, sizeof(ZSTD_DStream));
+	memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem));
+	zds->dctx = ZSTD_createDCtx_advanced(customMem);
+	if (zds->dctx == NULL) {
+		ZSTD_freeDStream(zds);
+		return NULL;
+	}
+	zds->stage = zdss_init;
+	zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+	return zds;
+}
+
+ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
+	ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem);
+	if (!zds) {
+		return NULL;
+	}
+
+	zds->maxWindowSize = maxWindowSize;
+	zds->stage = zdss_loadHeader;
+	zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+	ZSTD_freeDDict(zds->ddictLocal);
+	zds->ddictLocal = NULL;
+	zds->ddict = zds->ddictLocal;
+	zds->legacyVersion = 0;
+	zds->hostageByte = 0;
+
+	{
+		size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+		size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
+
+		zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem);
+		zds->inBuffSize = blockSize;
+		zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem);
+		zds->outBuffSize = neededOutSize;
+		if (zds->inBuff == NULL || zds->outBuff == NULL) {
+			ZSTD_freeDStream(zds);
+			return NULL;
+		}
+	}
+	return zds;
+}
+
+ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize)
+{
+	ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize);
+	if (zds) {
+		zds->ddict = ddict;
+	}
+	return zds;
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream *zds)
+{
+	if (zds == NULL)
+		return 0; /* support free on null */
+	{
+		ZSTD_customMem const cMem = zds->customMem;
+		ZSTD_freeDCtx(zds->dctx);
+		zds->dctx = NULL;
+		ZSTD_freeDDict(zds->ddictLocal);
+		zds->ddictLocal = NULL;
+		ZSTD_free(zds->inBuff, cMem);
+		zds->inBuff = NULL;
+		ZSTD_free(zds->outBuff, cMem);
+		zds->outBuff = NULL;
+		ZSTD_free(zds, cMem);
+		return 0;
+	}
+}
+
+/* *** Initialization *** */
+
+size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
+
+size_t ZSTD_resetDStream(ZSTD_DStream *zds)
+{
+	zds->stage = zdss_loadHeader;
+	zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+	zds->legacyVersion = 0;
+	zds->hostageByte = 0;
+	return ZSTD_frameHeaderSize_prefix;
+}
+
+/* *****   Decompression   ***** */
+
+ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
+{
+	size_t const length = MIN(dstCapacity, srcSize);
+	memcpy(dst, src, length);
+	return length;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
+{
+	const char *const istart = (const char *)(input->src) + input->pos;
+	const char *const iend = (const char *)(input->src) + input->size;
+	const char *ip = istart;
+	char *const ostart = (char *)(output->dst) + output->pos;
+	char *const oend = (char *)(output->dst) + output->size;
+	char *op = ostart;
+	U32 someMoreWork = 1;
+
+	while (someMoreWork) {
+		switch (zds->stage) {
+		case zdss_init:
+			ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
+						/* fall-through */
+
+		case zdss_loadHeader: {
+			size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
+			if (ZSTD_isError(hSize))
+				return hSize;
+			if (hSize != 0) {				   /* need more input */
+				size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
+				if (toLoad > (size_t)(iend - ip)) {	/* not enough input to load full header */
+					memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip);
+					zds->lhSize += iend - ip;
+					input->pos = input->size;
+					return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) +
+					       ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
+				}
+				memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad);
+				zds->lhSize = hSize;
+				ip += toLoad;
+				break;
+			}
+
+			/* check for single-pass mode opportunity */
+			if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
+			    && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) {
+				size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart);
+				if (cSize <= (size_t)(iend - istart)) {
+					size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict);
+					if (ZSTD_isError(decompressedSize))
+						return decompressedSize;
+					ip = istart + cSize;
+					op += decompressedSize;
+					zds->dctx->expected = 0;
+					zds->stage = zdss_init;
+					someMoreWork = 0;
+					break;
+				}
+			}
+
+			/* Consume header */
+			ZSTD_refDDict(zds->dctx, zds->ddict);
+			{
+				size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
+				CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
+				{
+					size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+					CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size));
+				}
+			}
+
+			zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+			if (zds->fParams.windowSize > zds->maxWindowSize)
+				return ERROR(frameParameter_windowTooLarge);
+
+			/* Buffers are preallocated, but double check */
+			{
+				size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+				size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
+				if (zds->inBuffSize < blockSize) {
+					return ERROR(GENERIC);
+				}
+				if (zds->outBuffSize < neededOutSize) {
+					return ERROR(GENERIC);
+				}
+				zds->blockSize = blockSize;
+			}
+			zds->stage = zdss_read;
+		}
+		/* pass-through */
+
+		case zdss_read: {
+			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+			if (neededInSize == 0) { /* end of frame */
+				zds->stage = zdss_init;
+				someMoreWork = 0;
+				break;
+			}
+			if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */
+				const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
+				size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart,
+										   (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize);
+				if (ZSTD_isError(decodedSize))
+					return decodedSize;
+				ip += neededInSize;
+				if (!decodedSize && !isSkipFrame)
+					break; /* this was just a header */
+				zds->outEnd = zds->outStart + decodedSize;
+				zds->stage = zdss_flush;
+				break;
+			}
+			if (ip == iend) {
+				someMoreWork = 0;
+				break;
+			} /* no more input */
+			zds->stage = zdss_load;
+			/* pass-through */
+		}
+
+		case zdss_load: {
+			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+			size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */
+			size_t loadedSize;
+			if (toLoad > zds->inBuffSize - zds->inPos)
+				return ERROR(corruption_detected); /* should never happen */
+			loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip);
+			ip += loadedSize;
+			zds->inPos += loadedSize;
+			if (loadedSize < toLoad) {
+				someMoreWork = 0;
+				break;
+			} /* not enough input, wait for more */
+
+			/* decode loaded input */
+			{
+				const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
+				size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
+										   zds->inBuff, neededInSize);
+				if (ZSTD_isError(decodedSize))
+					return decodedSize;
+				zds->inPos = 0; /* input is consumed */
+				if (!decodedSize && !isSkipFrame) {
+					zds->stage = zdss_read;
+					break;
+				} /* this was just a header */
+				zds->outEnd = zds->outStart + decodedSize;
+				zds->stage = zdss_flush;
+				/* pass-through */
+			}
+		}
+
+		case zdss_flush: {
+			size_t const toFlushSize = zds->outEnd - zds->outStart;
+			size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize);
+			op += flushedSize;
+			zds->outStart += flushedSize;
+			if (flushedSize == toFlushSize) { /* flush completed */
+				zds->stage = zdss_read;
+				if (zds->outStart + zds->blockSize > zds->outBuffSize)
+					zds->outStart = zds->outEnd = 0;
+				break;
+			}
+			/* cannot complete flush */
+			someMoreWork = 0;
+			break;
+		}
+		default:
+			return ERROR(GENERIC); /* impossible */
+		}
+	}
+
+	/* result */
+	input->pos += (size_t)(ip - istart);
+	output->pos += (size_t)(op - ostart);
+	{
+		size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+		if (!nextSrcSizeHint) {			    /* frame fully decoded */
+			if (zds->outEnd == zds->outStart) { /* output fully flushed */
+				if (zds->hostageByte) {
+					if (input->pos >= input->size) {
+						zds->stage = zdss_read;
+						return 1;
+					}	     /* can't release hostage (not present) */
+					input->pos++; /* release hostage */
+				}
+				return 0;
+			}
+			if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+				input->pos--;    /* note : pos > 0, otherwise, impossible to finish reading last block */
+				zds->hostageByte = 1;
+			}
+			return 1;
+		}
+		nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */
+		if (zds->inPos > nextSrcSizeHint)
+			return ERROR(GENERIC); /* should never happen */
+		nextSrcSizeHint -= zds->inPos; /* already loaded*/
+		return nextSrcSizeHint;
+	}
+}
+
+EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
+EXPORT_SYMBOL(ZSTD_initDCtx);
+EXPORT_SYMBOL(ZSTD_decompressDCtx);
+EXPORT_SYMBOL(ZSTD_decompress_usingDict);
+
+EXPORT_SYMBOL(ZSTD_DDictWorkspaceBound);
+EXPORT_SYMBOL(ZSTD_initDDict);
+EXPORT_SYMBOL(ZSTD_decompress_usingDDict);
+
+EXPORT_SYMBOL(ZSTD_DStreamWorkspaceBound);
+EXPORT_SYMBOL(ZSTD_initDStream);
+EXPORT_SYMBOL(ZSTD_initDStream_usingDDict);
+EXPORT_SYMBOL(ZSTD_resetDStream);
+EXPORT_SYMBOL(ZSTD_decompressStream);
+EXPORT_SYMBOL(ZSTD_DStreamInSize);
+EXPORT_SYMBOL(ZSTD_DStreamOutSize);
+
+EXPORT_SYMBOL(ZSTD_findFrameCompressedSize);
+EXPORT_SYMBOL(ZSTD_getFrameContentSize);
+EXPORT_SYMBOL(ZSTD_findDecompressedSize);
+
+EXPORT_SYMBOL(ZSTD_isFrame);
+EXPORT_SYMBOL(ZSTD_getDictID_fromDict);
+EXPORT_SYMBOL(ZSTD_getDictID_fromDDict);
+EXPORT_SYMBOL(ZSTD_getDictID_fromFrame);
+
+EXPORT_SYMBOL(ZSTD_getFrameParams);
+EXPORT_SYMBOL(ZSTD_decompressBegin);
+EXPORT_SYMBOL(ZSTD_decompressBegin_usingDict);
+EXPORT_SYMBOL(ZSTD_copyDCtx);
+EXPORT_SYMBOL(ZSTD_nextSrcSizeToDecompress);
+EXPORT_SYMBOL(ZSTD_decompressContinue);
+EXPORT_SYMBOL(ZSTD_nextInputType);
+
+EXPORT_SYMBOL(ZSTD_decompressBlock);
+EXPORT_SYMBOL(ZSTD_insertBlock);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");

diff --git a/lib/zstd/entropy_common.c b/lib/zstd/entropy_common.c
new file mode 100644
index 0000000..2b0a643
--- /dev/null
+++ b/lib/zstd/entropy_common.c

@@ -0,0 +1,243 @@
+/*
+ * Common functions of New Generation Entropy library
+ * Copyright (C) 2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include "error_private.h" /* ERR_*, ERROR */
+#include "fse.h"
+#include "huf.h"
+#include "mem.h"
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize)
+{
+	const BYTE *const istart = (const BYTE *)headerBuffer;
+	const BYTE *const iend = istart + hbSize;
+	const BYTE *ip = istart;
+	int nbBits;
+	int remaining;
+	int threshold;
+	U32 bitStream;
+	int bitCount;
+	unsigned charnum = 0;
+	int previous0 = 0;
+
+	if (hbSize < 4)
+		return ERROR(srcSize_wrong);
+	bitStream = ZSTD_readLE32(ip);
+	nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
+	if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
+		return ERROR(tableLog_tooLarge);
+	bitStream >>= 4;
+	bitCount = 4;
+	*tableLogPtr = nbBits;
+	remaining = (1 << nbBits) + 1;
+	threshold = 1 << nbBits;
+	nbBits++;
+
+	while ((remaining > 1) & (charnum <= *maxSVPtr)) {
+		if (previous0) {
+			unsigned n0 = charnum;
+			while ((bitStream & 0xFFFF) == 0xFFFF) {
+				n0 += 24;
+				if (ip < iend - 5) {
+					ip += 2;
+					bitStream = ZSTD_readLE32(ip) >> bitCount;
+				} else {
+					bitStream >>= 16;
+					bitCount += 16;
+				}
+			}
+			while ((bitStream & 3) == 3) {
+				n0 += 3;
+				bitStream >>= 2;
+				bitCount += 2;
+			}
+			n0 += bitStream & 3;
+			bitCount += 2;
+			if (n0 > *maxSVPtr)
+				return ERROR(maxSymbolValue_tooSmall);
+			while (charnum < n0)
+				normalizedCounter[charnum++] = 0;
+			if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
+				ip += bitCount >> 3;
+				bitCount &= 7;
+				bitStream = ZSTD_readLE32(ip) >> bitCount;
+			} else {
+				bitStream >>= 2;
+			}
+		}
+		{
+			int const max = (2 * threshold - 1) - remaining;
+			int count;
+
+			if ((bitStream & (threshold - 1)) < (U32)max) {
+				count = bitStream & (threshold - 1);
+				bitCount += nbBits - 1;
+			} else {
+				count = bitStream & (2 * threshold - 1);
+				if (count >= threshold)
+					count -= max;
+				bitCount += nbBits;
+			}
+
+			count--;				 /* extra accuracy */
+			remaining -= count < 0 ? -count : count; /* -1 means +1 */
+			normalizedCounter[charnum++] = (short)count;
+			previous0 = !count;
+			while (remaining < threshold) {
+				nbBits--;
+				threshold >>= 1;
+			}
+
+			if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
+				ip += bitCount >> 3;
+				bitCount &= 7;
+			} else {
+				bitCount -= (int)(8 * (iend - 4 - ip));
+				ip = iend - 4;
+			}
+			bitStream = ZSTD_readLE32(ip) >> (bitCount & 31);
+		}
+	} /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
+	if (remaining != 1)
+		return ERROR(corruption_detected);
+	if (bitCount > 32)
+		return ERROR(corruption_detected);
+	*maxSVPtr = charnum - 1;
+
+	ip += (bitCount + 7) >> 3;
+	return ip - istart;
+}
+
+/*! HUF_readStats() :
+	Read compact Huffman tree, saved by HUF_writeCTable().
+	`huffWeight` is destination buffer.
+	`rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+	@return : size read from `src` , or an error Code .
+	Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
+{
+	U32 weightTotal;
+	const BYTE *ip = (const BYTE *)src;
+	size_t iSize;
+	size_t oSize;
+
+	if (!srcSize)
+		return ERROR(srcSize_wrong);
+	iSize = ip[0];
+	/* memset(huffWeight, 0, hwSize);   */ /* is not necessary, even though some analyzer complain ... */
+
+	if (iSize >= 128) { /* special header */
+		oSize = iSize - 127;
+		iSize = ((oSize + 1) / 2);
+		if (iSize + 1 > srcSize)
+			return ERROR(srcSize_wrong);
+		if (oSize >= hwSize)
+			return ERROR(corruption_detected);
+		ip += 1;
+		{
+			U32 n;
+			for (n = 0; n < oSize; n += 2) {
+				huffWeight[n] = ip[n / 2] >> 4;
+				huffWeight[n + 1] = ip[n / 2] & 15;
+			}
+		}
+	} else {						 /* header compressed with FSE (normal case) */
+		if (iSize + 1 > srcSize)
+			return ERROR(srcSize_wrong);
+		oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */
+		if (FSE_isError(oSize))
+			return oSize;
+	}
+
+	/* collect weight stats */
+	memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+	weightTotal = 0;
+	{
+		U32 n;
+		for (n = 0; n < oSize; n++) {
+			if (huffWeight[n] >= HUF_TABLELOG_MAX)
+				return ERROR(corruption_detected);
+			rankStats[huffWeight[n]]++;
+			weightTotal += (1 << huffWeight[n]) >> 1;
+		}
+	}
+	if (weightTotal == 0)
+		return ERROR(corruption_detected);
+
+	/* get last non-null symbol weight (implied, total must be 2^n) */
+	{
+		U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+		if (tableLog > HUF_TABLELOG_MAX)
+			return ERROR(corruption_detected);
+		*tableLogPtr = tableLog;
+		/* determine last weight */
+		{
+			U32 const total = 1 << tableLog;
+			U32 const rest = total - weightTotal;
+			U32 const verif = 1 << BIT_highbit32(rest);
+			U32 const lastWeight = BIT_highbit32(rest) + 1;
+			if (verif != rest)
+				return ERROR(corruption_detected); /* last value must be a clean power of 2 */
+			huffWeight[oSize] = (BYTE)lastWeight;
+			rankStats[lastWeight]++;
+		}
+	}
+
+	/* check tree construction validity */
+	if ((rankStats[1] < 2) || (rankStats[1] & 1))
+		return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
+
+	/* results */
+	*nbSymbolsPtr = (U32)(oSize + 1);
+	return iSize + 1;
+}

diff --git a/lib/zstd/error_private.h b/lib/zstd/error_private.h
new file mode 100644
index 0000000..1a60b31
--- /dev/null
+++ b/lib/zstd/error_private.h

@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include <linux/types.h> /* size_t */
+#include <linux/zstd.h>  /* enum list */
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#define ERR_STATIC static __attribute__((unused))
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#define ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code)
+{
+	if (!ERR_isError(code))
+		return (ERR_enum)0;
+	return (ERR_enum)(0 - code);
+}
+
+#endif /* ERROR_H_MODULE */

diff --git a/lib/zstd/fse.h b/lib/zstd/fse.h
new file mode 100644
index 0000000..7460ab0
--- /dev/null
+++ b/lib/zstd/fse.h

@@ -0,0 +1,575 @@
+/*
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (C) 2013-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+#ifndef FSE_H
+#define FSE_H
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include <linux/types.h> /* size_t, ptrdiff_t */
+
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#define FSE_PUBLIC_API
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR 0
+#define FSE_VERSION_MINOR 9
+#define FSE_VERSION_RELEASE 0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+/*! FSE_optimalTableLog():
+	dynamically downsize 'tableLog' when conditions are met.
+	It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+	@return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+	normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+	'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+	@return : tableLog,
+			  or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_NCountWriteBound():
+	Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+	Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+	Compactly save 'normalizedCounter' into 'buffer'.
+	@return : size of the compressed table,
+			  or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+	Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
+
+/*! FSE_compress_usingCTable():
+	Compress `src` using `ct` into `dst` which must be already allocated.
+	@return : size of compressed data (<= `dstCapacity`),
+			  or 0 if compressed data could not fit into `dst`,
+			  or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+	Read compactly saved 'normalizedCounter' from 'rBuffer'.
+	@return : size read from 'rBuffer',
+			  or an errorCode, which can be tested using FSE_isError().
+			  maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize);
+
+/*! Constructor and Destructor of FSE_DTable.
+	Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
+
+/*! FSE_buildDTable():
+	Builds 'dt', which must be already allocated, using FSE_createDTable().
+	return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize);
+
+/*! FSE_decompress_usingDTable():
+	Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+	into `dst` which must be already allocated.
+	@return : size of regenerated data (necessarily <= `dstCapacity`),
+			  or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+/* *** Dependency *** */
+#include "bitstream.h"
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size >> 7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog))
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+/* FSE_count_wksp() :
+ * Same as FSE_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned
+ */
+size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace);
+
+/* FSE_countFast_wksp() :
+ * Same as FSE_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` must be a table of minimum `1024` unsigned
+ */
+size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace);
+
+/*! FSE_count_simple
+ * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
+ * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
+*/
+size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize);
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/**< same as FSE_optimalTableLog(), which used `minus==2` */
+
+size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits);
+/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue);
+/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `(1<<tableLog)`.
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize);
+
+size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits);
+/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle(FSE_DTable *dt, unsigned char symbolValue);
+/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+	ptrdiff_t value;
+	const void *stateTable;
+	const void *symbolTT;
+	unsigned stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t *CStatePtr, const FSE_CTable *ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *CStatePtr);
+
+/**<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+	size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+	FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+	FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+	BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+	BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+	FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+	size_t size = BIT_closeCStream(&bitStream);
+*/
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+	size_t state;
+	const void *table; /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+static void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr);
+
+/**<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+	errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+	errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+	unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+	size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+	endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+	BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+	BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+	FSE_endOfDState(&DState);
+*/
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+	int deltaFindState;
+	U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct)
+{
+	const void *ptr = ct;
+	const U16 *u16ptr = (const U16 *)ptr;
+	const U32 tableLog = ZSTD_read16(ptr);
+	statePtr->value = (ptrdiff_t)1 << tableLog;
+	statePtr->stateTable = u16ptr + 2;
+	statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1));
+	statePtr->stateLog = tableLog;
+}
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol)
+{
+	FSE_initCState(statePtr, ct);
+	{
+		const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
+		const U16 *stateTable = (const U16 *)(statePtr->stateTable);
+		U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16);
+		statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+		statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+	}
+}
+
+ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol)
+{
+	const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
+	const U16 *const stateTable = (const U16 *)(statePtr->stateTable);
+	U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+	BIT_addBits(bitC, statePtr->value, nbBitsOut);
+	statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr)
+{
+	BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+	BIT_flushBits(bitC);
+}
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+	U16 tableLog;
+	U16 fastMode;
+} FSE_DTableHeader; /* sizeof U32 */
+
+typedef struct {
+	unsigned short newState;
+	unsigned char symbol;
+	unsigned char nbBits;
+} FSE_decode_t; /* size == U32 */
+
+ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt)
+{
+	const void *ptr = dt;
+	const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr;
+	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+	BIT_reloadDStream(bitD);
+	DStatePtr->table = dt + 1;
+}
+
+ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr)
+{
+	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
+	return DInfo.symbol;
+}
+
+ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
+{
+	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
+	U32 const nbBits = DInfo.nbBits;
+	size_t const lowBits = BIT_readBits(bitD, nbBits);
+	DStatePtr->state = DInfo.newState + lowBits;
+}
+
+ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
+{
+	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
+	U32 const nbBits = DInfo.nbBits;
+	BYTE const symbol = DInfo.symbol;
+	size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+	DStatePtr->state = DInfo.newState + lowBits;
+	return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+	unsafe, only works if no symbol has a probability > 50% */
+ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
+{
+	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
+	U32 const nbBits = DInfo.nbBits;
+	BYTE const symbol = DInfo.symbol;
+	size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+	DStatePtr->state = DInfo.newState + lowBits;
+	return symbol;
+}
+
+ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; }
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+#define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+#define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2)
+#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3)
+
+#endif /* FSE_H */

diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c
new file mode 100644
index 0000000..ef3d174
--- /dev/null
+++ b/lib/zstd/fse_compress.c

@@ -0,0 +1,795 @@
+/*
+ * FSE : Finite State Entropy encoder
+ * Copyright (C) 2013-2015, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#define FORCE_INLINE static __always_inline
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "bitstream.h"
+#include "fse.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/string.h> /* memcpy, memset */
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c)                                   \
+	{                                                      \
+		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
+	} /* use only *after* variable declarations */
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X, Y) X##Y
+#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
+#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
+
+/* Function templates */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
+{
+	U32 const tableSize = 1 << tableLog;
+	U32 const tableMask = tableSize - 1;
+	void *const ptr = ct;
+	U16 *const tableU16 = ((U16 *)ptr) + 2;
+	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
+	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
+	U32 const step = FSE_TABLESTEP(tableSize);
+	U32 highThreshold = tableSize - 1;
+
+	U32 *cumul;
+	FSE_FUNCTION_TYPE *tableSymbol;
+	size_t spaceUsed32 = 0;
+
+	cumul = (U32 *)workspace + spaceUsed32;
+	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
+	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	/* CTable header */
+	tableU16[-2] = (U16)tableLog;
+	tableU16[-1] = (U16)maxSymbolValue;
+
+	/* For explanations on how to distribute symbol values over the table :
+	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+	/* symbol start positions */
+	{
+		U32 u;
+		cumul[0] = 0;
+		for (u = 1; u <= maxSymbolValue + 1; u++) {
+			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
+				cumul[u] = cumul[u - 1] + 1;
+				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
+			} else {
+				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
+			}
+		}
+		cumul[maxSymbolValue + 1] = tableSize + 1;
+	}
+
+	/* Spread symbols */
+	{
+		U32 position = 0;
+		U32 symbol;
+		for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
+			int nbOccurences;
+			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
+				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+				position = (position + step) & tableMask;
+				while (position > highThreshold)
+					position = (position + step) & tableMask; /* Low proba area */
+			}
+		}
+
+		if (position != 0)
+			return ERROR(GENERIC); /* Must have gone through all positions */
+	}
+
+	/* Build table */
+	{
+		U32 u;
+		for (u = 0; u < tableSize; u++) {
+			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */
+			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
+		}
+	}
+
+	/* Build Symbol Transformation Table */
+	{
+		unsigned total = 0;
+		unsigned s;
+		for (s = 0; s <= maxSymbolValue; s++) {
+			switch (normalizedCounter[s]) {
+			case 0: break;
+
+			case -1:
+			case 1:
+				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
+				symbolTT[s].deltaFindState = total - 1;
+				total++;
+				break;
+			default: {
+				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
+				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+				symbolTT[s].deltaFindState = total - normalizedCounter[s];
+				total += normalizedCounter[s];
+			}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
+	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
+}
+
+static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+				      unsigned writeIsSafe)
+{
+	BYTE *const ostart = (BYTE *)header;
+	BYTE *out = ostart;
+	BYTE *const oend = ostart + headerBufferSize;
+	int nbBits;
+	const int tableSize = 1 << tableLog;
+	int remaining;
+	int threshold;
+	U32 bitStream;
+	int bitCount;
+	unsigned charnum = 0;
+	int previous0 = 0;
+
+	bitStream = 0;
+	bitCount = 0;
+	/* Table Size */
+	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
+	bitCount += 4;
+
+	/* Init */
+	remaining = tableSize + 1; /* +1 for extra accuracy */
+	threshold = tableSize;
+	nbBits = tableLog + 1;
+
+	while (remaining > 1) { /* stops at 1 */
+		if (previous0) {
+			unsigned start = charnum;
+			while (!normalizedCounter[charnum])
+				charnum++;
+			while (charnum >= start + 24) {
+				start += 24;
+				bitStream += 0xFFFFU << bitCount;
+				if ((!writeIsSafe) && (out > oend - 2))
+					return ERROR(dstSize_tooSmall); /* Buffer overflow */
+				out[0] = (BYTE)bitStream;
+				out[1] = (BYTE)(bitStream >> 8);
+				out += 2;
+				bitStream >>= 16;
+			}
+			while (charnum >= start + 3) {
+				start += 3;
+				bitStream += 3 << bitCount;
+				bitCount += 2;
+			}
+			bitStream += (charnum - start) << bitCount;
+			bitCount += 2;
+			if (bitCount > 16) {
+				if ((!writeIsSafe) && (out > oend - 2))
+					return ERROR(dstSize_tooSmall); /* Buffer overflow */
+				out[0] = (BYTE)bitStream;
+				out[1] = (BYTE)(bitStream >> 8);
+				out += 2;
+				bitStream >>= 16;
+				bitCount -= 16;
+			}
+		}
+		{
+			int count = normalizedCounter[charnum++];
+			int const max = (2 * threshold - 1) - remaining;
+			remaining -= count < 0 ? -count : count;
+			count++; /* +1 for extra accuracy */
+			if (count >= threshold)
+				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+			bitStream += count << bitCount;
+			bitCount += nbBits;
+			bitCount -= (count < max);
+			previous0 = (count == 1);
+			if (remaining < 1)
+				return ERROR(GENERIC);
+			while (remaining < threshold)
+				nbBits--, threshold >>= 1;
+		}
+		if (bitCount > 16) {
+			if ((!writeIsSafe) && (out > oend - 2))
+				return ERROR(dstSize_tooSmall); /* Buffer overflow */
+			out[0] = (BYTE)bitStream;
+			out[1] = (BYTE)(bitStream >> 8);
+			out += 2;
+			bitStream >>= 16;
+			bitCount -= 16;
+		}
+	}
+
+	/* flush remaining bitStream */
+	if ((!writeIsSafe) && (out > oend - 2))
+		return ERROR(dstSize_tooSmall); /* Buffer overflow */
+	out[0] = (BYTE)bitStream;
+	out[1] = (BYTE)(bitStream >> 8);
+	out += (bitCount + 7) / 8;
+
+	if (charnum > maxSymbolValue + 1)
+		return ERROR(GENERIC);
+
+	return (out - ostart);
+}
+
+size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+	if (tableLog > FSE_MAX_TABLELOG)
+		return ERROR(tableLog_tooLarge); /* Unsupported */
+	if (tableLog < FSE_MIN_TABLELOG)
+		return ERROR(GENERIC); /* Unsupported */
+
+	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
+}
+
+/*-**************************************************************
+*  Counting histogram
+****************************************************************/
+/*! FSE_count_simple
+	This function counts byte values within `src`, and store the histogram into table `count`.
+	It doesn't use any additional memory.
+	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
+	For this reason, prefer using a table `count` with 256 elements.
+	@return : count of most numerous element
+*/
+size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
+{
+	const BYTE *ip = (const BYTE *)src;
+	const BYTE *const end = ip + srcSize;
+	unsigned maxSymbolValue = *maxSymbolValuePtr;
+	unsigned max = 0;
+
+	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
+	if (srcSize == 0) {
+		*maxSymbolValuePtr = 0;
+		return 0;
+	}
+
+	while (ip < end)
+		count[*ip++]++;
+
+	while (!count[maxSymbolValue])
+		maxSymbolValue--;
+	*maxSymbolValuePtr = maxSymbolValue;
+
+	{
+		U32 s;
+		for (s = 0; s <= maxSymbolValue; s++)
+			if (count[s] > max)
+				max = count[s];
+	}
+
+	return (size_t)max;
+}
+
+/* FSE_count_parallel_wksp() :
+ * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
+static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
+				      unsigned *const workSpace)
+{
+	const BYTE *ip = (const BYTE *)source;
+	const BYTE *const iend = ip + sourceSize;
+	unsigned maxSymbolValue = *maxSymbolValuePtr;
+	unsigned max = 0;
+	U32 *const Counting1 = workSpace;
+	U32 *const Counting2 = Counting1 + 256;
+	U32 *const Counting3 = Counting2 + 256;
+	U32 *const Counting4 = Counting3 + 256;
+
+	memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
+
+	/* safety checks */
+	if (!sourceSize) {
+		memset(count, 0, maxSymbolValue + 1);
+		*maxSymbolValuePtr = 0;
+		return 0;
+	}
+	if (!maxSymbolValue)
+		maxSymbolValue = 255; /* 0 == default */
+
+	/* by stripes of 16 bytes */
+	{
+		U32 cached = ZSTD_read32(ip);
+		ip += 4;
+		while (ip < iend - 15) {
+			U32 c = cached;
+			cached = ZSTD_read32(ip);
+			ip += 4;
+			Counting1[(BYTE)c]++;
+			Counting2[(BYTE)(c >> 8)]++;
+			Counting3[(BYTE)(c >> 16)]++;
+			Counting4[c >> 24]++;
+			c = cached;
+			cached = ZSTD_read32(ip);
+			ip += 4;
+			Counting1[(BYTE)c]++;
+			Counting2[(BYTE)(c >> 8)]++;
+			Counting3[(BYTE)(c >> 16)]++;
+			Counting4[c >> 24]++;
+			c = cached;
+			cached = ZSTD_read32(ip);
+			ip += 4;
+			Counting1[(BYTE)c]++;
+			Counting2[(BYTE)(c >> 8)]++;
+			Counting3[(BYTE)(c >> 16)]++;
+			Counting4[c >> 24]++;
+			c = cached;
+			cached = ZSTD_read32(ip);
+			ip += 4;
+			Counting1[(BYTE)c]++;
+			Counting2[(BYTE)(c >> 8)]++;
+			Counting3[(BYTE)(c >> 16)]++;
+			Counting4[c >> 24]++;
+		}
+		ip -= 4;
+	}
+
+	/* finish last symbols */
+	while (ip < iend)
+		Counting1[*ip++]++;
+
+	if (checkMax) { /* verify stats will fit into destination table */
+		U32 s;
+		for (s = 255; s > maxSymbolValue; s--) {
+			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+			if (Counting1[s])
+				return ERROR(maxSymbolValue_tooSmall);
+		}
+	}
+
+	{
+		U32 s;
+		for (s = 0; s <= maxSymbolValue; s++) {
+			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+			if (count[s] > max)
+				max = count[s];
+		}
+	}
+
+	while (!count[maxSymbolValue])
+		maxSymbolValue--;
+	*maxSymbolValuePtr = maxSymbolValue;
+	return (size_t)max;
+}
+
+/* FSE_countFast_wksp() :
+ * Same as FSE_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned */
+size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
+{
+	if (sourceSize < 1500)
+		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
+}
+
+/* FSE_count_wksp() :
+ * Same as FSE_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned */
+size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
+{
+	if (*maxSymbolValuePtr < 255)
+		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
+	*maxSymbolValuePtr = 255;
+	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
+}
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+/*! FSE_sizeof_CTable() :
+	FSE_CTable is a variable size structure which contains :
+	`U16 tableLog;`
+	`U16 maxSymbolValue;`
+	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable
+	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
+Allocation is manual (C standard does not support variable-size structures).
+*/
+size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
+{
+	if (tableLog > FSE_MAX_TABLELOG)
+		return ERROR(tableLog_tooLarge);
+	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
+}
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
+	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+	return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+	U32 tableLog = maxTableLog;
+	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+	if (tableLog == 0)
+		tableLog = FSE_DEFAULT_TABLELOG;
+	if (maxBitsSrc < tableLog)
+		tableLog = maxBitsSrc; /* Accuracy can be reduced */
+	if (minBits > tableLog)
+		tableLog = minBits; /* Need a minimum to safely represent all symbol values */
+	if (tableLog < FSE_MIN_TABLELOG)
+		tableLog = FSE_MIN_TABLELOG;
+	if (tableLog > FSE_MAX_TABLELOG)
+		tableLog = FSE_MAX_TABLELOG;
+	return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
+{
+	short const NOT_YET_ASSIGNED = -2;
+	U32 s;
+	U32 distributed = 0;
+	U32 ToDistribute;
+
+	/* Init */
+	U32 const lowThreshold = (U32)(total >> tableLog);
+	U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+	for (s = 0; s <= maxSymbolValue; s++) {
+		if (count[s] == 0) {
+			norm[s] = 0;
+			continue;
+		}
+		if (count[s] <= lowThreshold) {
+			norm[s] = -1;
+			distributed++;
+			total -= count[s];
+			continue;
+		}
+		if (count[s] <= lowOne) {
+			norm[s] = 1;
+			distributed++;
+			total -= count[s];
+			continue;
+		}
+
+		norm[s] = NOT_YET_ASSIGNED;
+	}
+	ToDistribute = (1 << tableLog) - distributed;
+
+	if ((total / ToDistribute) > lowOne) {
+		/* risk of rounding to zero */
+		lowOne = (U32)((total * 3) / (ToDistribute * 2));
+		for (s = 0; s <= maxSymbolValue; s++) {
+			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
+				norm[s] = 1;
+				distributed++;
+				total -= count[s];
+				continue;
+			}
+		}
+		ToDistribute = (1 << tableLog) - distributed;
+	}
+
+	if (distributed == maxSymbolValue + 1) {
+		/* all values are pretty poor;
+		   probably incompressible data (should have already been detected);
+		   find max, then give all remaining points to max */
+		U32 maxV = 0, maxC = 0;
+		for (s = 0; s <= maxSymbolValue; s++)
+			if (count[s] > maxC)
+				maxV = s, maxC = count[s];
+		norm[maxV] += (short)ToDistribute;
+		return 0;
+	}
+
+	if (total == 0) {
+		/* all of the symbols were low enough for the lowOne or lowThreshold */
+		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
+			if (norm[s] > 0)
+				ToDistribute--, norm[s]++;
+		return 0;
+	}
+
+	{
+		U64 const vStepLog = 62 - tableLog;
+		U64 const mid = (1ULL << (vStepLog - 1)) - 1;
+		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
+		U64 tmpTotal = mid;
+		for (s = 0; s <= maxSymbolValue; s++) {
+			if (norm[s] == NOT_YET_ASSIGNED) {
+				U64 const end = tmpTotal + (count[s] * rStep);
+				U32 const sStart = (U32)(tmpTotal >> vStepLog);
+				U32 const sEnd = (U32)(end >> vStepLog);
+				U32 const weight = sEnd - sStart;
+				if (weight < 1)
+					return ERROR(GENERIC);
+				norm[s] = (short)weight;
+				tmpTotal = end;
+			}
+		}
+	}
+
+	return 0;
+}
+
+size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
+{
+	/* Sanity checks */
+	if (tableLog == 0)
+		tableLog = FSE_DEFAULT_TABLELOG;
+	if (tableLog < FSE_MIN_TABLELOG)
+		return ERROR(GENERIC); /* Unsupported size */
+	if (tableLog > FSE_MAX_TABLELOG)
+		return ERROR(tableLog_tooLarge); /* Unsupported size */
+	if (tableLog < FSE_minTableLog(total, maxSymbolValue))
+		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
+
+	{
+		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
+		U64 const scale = 62 - tableLog;
+		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
+		U64 const vStep = 1ULL << (scale - 20);
+		int stillToDistribute = 1 << tableLog;
+		unsigned s;
+		unsigned largest = 0;
+		short largestP = 0;
+		U32 lowThreshold = (U32)(total >> tableLog);
+
+		for (s = 0; s <= maxSymbolValue; s++) {
+			if (count[s] == total)
+				return 0; /* rle special case */
+			if (count[s] == 0) {
+				normalizedCounter[s] = 0;
+				continue;
+			}
+			if (count[s] <= lowThreshold) {
+				normalizedCounter[s] = -1;
+				stillToDistribute--;
+			} else {
+				short proba = (short)((count[s] * step) >> scale);
+				if (proba < 8) {
+					U64 restToBeat = vStep * rtbTable[proba];
+					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
+				}
+				if (proba > largestP)
+					largestP = proba, largest = s;
+				normalizedCounter[s] = proba;
+				stillToDistribute -= proba;
+			}
+		}
+		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+			/* corner case, need another normalization method */
+			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+			if (FSE_isError(errorCode))
+				return errorCode;
+		} else
+			normalizedCounter[largest] += (short)stillToDistribute;
+	}
+
+	return tableLog;
+}
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
+{
+	const unsigned tableSize = 1 << nbBits;
+	const unsigned tableMask = tableSize - 1;
+	const unsigned maxSymbolValue = tableMask;
+	void *const ptr = ct;
+	U16 *const tableU16 = ((U16 *)ptr) + 2;
+	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
+	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
+	unsigned s;
+
+	/* Sanity checks */
+	if (nbBits < 1)
+		return ERROR(GENERIC); /* min size */
+
+	/* header */
+	tableU16[-2] = (U16)nbBits;
+	tableU16[-1] = (U16)maxSymbolValue;
+
+	/* Build table */
+	for (s = 0; s < tableSize; s++)
+		tableU16[s] = (U16)(tableSize + s);
+
+	/* Build Symbol Transformation Table */
+	{
+		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+		for (s = 0; s <= maxSymbolValue; s++) {
+			symbolTT[s].deltaNbBits = deltaNbBits;
+			symbolTT[s].deltaFindState = s - 1;
+		}
+	}
+
+	return 0;
+}
+
+/* fake FSE_CTable, for rle input (always same symbol) */
+size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
+{
+	void *ptr = ct;
+	U16 *tableU16 = ((U16 *)ptr) + 2;
+	void *FSCTptr = (U32 *)ptr + 2;
+	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
+
+	/* header */
+	tableU16[-2] = (U16)0;
+	tableU16[-1] = (U16)symbolValue;
+
+	/* Build table */
+	tableU16[0] = 0;
+	tableU16[1] = 0; /* just in case */
+
+	/* Build Symbol Transformation Table */
+	symbolTT[symbolValue].deltaNbBits = 0;
+	symbolTT[symbolValue].deltaFindState = 0;
+
+	return 0;
+}
+
+static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
+{
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *ip = iend;
+
+	BIT_CStream_t bitC;
+	FSE_CState_t CState1, CState2;
+
+	/* init */
+	if (srcSize <= 2)
+		return 0;
+	{
+		size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+		if (FSE_isError(initError))
+			return 0; /* not enough space available to write a bitstream */
+	}
+
+#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+	if (srcSize & 1) {
+		FSE_initCState2(&CState1, ct, *--ip);
+		FSE_initCState2(&CState2, ct, *--ip);
+		FSE_encodeSymbol(&bitC, &CState1, *--ip);
+		FSE_FLUSHBITS(&bitC);
+	} else {
+		FSE_initCState2(&CState2, ct, *--ip);
+		FSE_initCState2(&CState1, ct, *--ip);
+	}
+
+	/* join to mod 4 */
+	srcSize -= 2;
+	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
+		FSE_encodeSymbol(&bitC, &CState2, *--ip);
+		FSE_encodeSymbol(&bitC, &CState1, *--ip);
+		FSE_FLUSHBITS(&bitC);
+	}
+
+	/* 2 or 4 encoding per loop */
+	while (ip > istart) {
+
+		FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
+			FSE_FLUSHBITS(&bitC);
+
+		FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
+			FSE_encodeSymbol(&bitC, &CState2, *--ip);
+			FSE_encodeSymbol(&bitC, &CState1, *--ip);
+		}
+
+		FSE_FLUSHBITS(&bitC);
+	}
+
+	FSE_flushCState(&bitC, &CState2);
+	FSE_flushCState(&bitC, &CState1);
+	return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
+{
+	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+	if (fast)
+		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+	else
+		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }

diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
new file mode 100644
index 0000000..a84300e
--- /dev/null
+++ b/lib/zstd/fse_decompress.c

@@ -0,0 +1,332 @@
+/*
+ * FSE : Finite State Entropy decoder
+ * Copyright (C) 2013-2015, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#define FORCE_INLINE static __always_inline
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "bitstream.h"
+#include "fse.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memcpy, memset */
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c)                                   \
+	{                                                      \
+		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
+	} /* use only *after* variable declarations */
+
+/* check and forward error code */
+#define CHECK_F(f)                  \
+	{                           \
+		size_t const e = f; \
+		if (FSE_isError(e)) \
+			return e;   \
+	}
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X, Y) X##Y
+#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
+#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
+
+/* Function templates */
+
+size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
+{
+	void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
+	FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr);
+	U16 *symbolNext = (U16 *)workspace;
+
+	U32 const maxSV1 = maxSymbolValue + 1;
+	U32 const tableSize = 1 << tableLog;
+	U32 highThreshold = tableSize - 1;
+
+	/* Sanity Checks */
+	if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1))
+		return ERROR(tableLog_tooLarge);
+	if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE)
+		return ERROR(maxSymbolValue_tooLarge);
+	if (tableLog > FSE_MAX_TABLELOG)
+		return ERROR(tableLog_tooLarge);
+
+	/* Init, lay down lowprob symbols */
+	{
+		FSE_DTableHeader DTableH;
+		DTableH.tableLog = (U16)tableLog;
+		DTableH.fastMode = 1;
+		{
+			S16 const largeLimit = (S16)(1 << (tableLog - 1));
+			U32 s;
+			for (s = 0; s < maxSV1; s++) {
+				if (normalizedCounter[s] == -1) {
+					tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+					symbolNext[s] = 1;
+				} else {
+					if (normalizedCounter[s] >= largeLimit)
+						DTableH.fastMode = 0;
+					symbolNext[s] = normalizedCounter[s];
+				}
+			}
+		}
+		memcpy(dt, &DTableH, sizeof(DTableH));
+	}
+
+	/* Spread symbols */
+	{
+		U32 const tableMask = tableSize - 1;
+		U32 const step = FSE_TABLESTEP(tableSize);
+		U32 s, position = 0;
+		for (s = 0; s < maxSV1; s++) {
+			int i;
+			for (i = 0; i < normalizedCounter[s]; i++) {
+				tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+				position = (position + step) & tableMask;
+				while (position > highThreshold)
+					position = (position + step) & tableMask; /* lowprob area */
+			}
+		}
+		if (position != 0)
+			return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+	}
+
+	/* Build Decoding table */
+	{
+		U32 u;
+		for (u = 0; u < tableSize; u++) {
+			FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+			U16 nextState = symbolNext[symbol]++;
+			tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState));
+			tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize);
+		}
+	}
+
+	return 0;
+}
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue)
+{
+	void *ptr = dt;
+	FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
+	void *dPtr = dt + 1;
+	FSE_decode_t *const cell = (FSE_decode_t *)dPtr;
+
+	DTableH->tableLog = 0;
+	DTableH->fastMode = 0;
+
+	cell->newState = 0;
+	cell->symbol = symbolValue;
+	cell->nbBits = 0;
+
+	return 0;
+}
+
+size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits)
+{
+	void *ptr = dt;
+	FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
+	void *dPtr = dt + 1;
+	FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr;
+	const unsigned tableSize = 1 << nbBits;
+	const unsigned tableMask = tableSize - 1;
+	const unsigned maxSV1 = tableMask + 1;
+	unsigned s;
+
+	/* Sanity checks */
+	if (nbBits < 1)
+		return ERROR(GENERIC); /* min size */
+
+	/* Build Decoding Table */
+	DTableH->tableLog = (U16)nbBits;
+	DTableH->fastMode = 1;
+	for (s = 0; s < maxSV1; s++) {
+		dinfo[s].newState = 0;
+		dinfo[s].symbol = (BYTE)s;
+		dinfo[s].nbBits = (BYTE)nbBits;
+	}
+
+	return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt,
+						       const unsigned fast)
+{
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *op = ostart;
+	BYTE *const omax = op + maxDstSize;
+	BYTE *const olimit = omax - 3;
+
+	BIT_DStream_t bitD;
+	FSE_DState_t state1;
+	FSE_DState_t state2;
+
+	/* Init */
+	CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+	FSE_initDState(&state1, &bitD, dt);
+	FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+	/* 4 symbols per loop */
+	for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) {
+		op[0] = FSE_GETSYMBOL(&state1);
+
+		if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
+			BIT_reloadDStream(&bitD);
+
+		op[1] = FSE_GETSYMBOL(&state2);
+
+		if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
+		{
+			if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) {
+				op += 2;
+				break;
+			}
+		}
+
+		op[2] = FSE_GETSYMBOL(&state1);
+
+		if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
+			BIT_reloadDStream(&bitD);
+
+		op[3] = FSE_GETSYMBOL(&state2);
+	}
+
+	/* tail */
+	/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+	while (1) {
+		if (op > (omax - 2))
+			return ERROR(dstSize_tooSmall);
+		*op++ = FSE_GETSYMBOL(&state1);
+		if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
+			*op++ = FSE_GETSYMBOL(&state2);
+			break;
+		}
+
+		if (op > (omax - 2))
+			return ERROR(dstSize_tooSmall);
+		*op++ = FSE_GETSYMBOL(&state2);
+		if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
+			*op++ = FSE_GETSYMBOL(&state1);
+			break;
+		}
+	}
+
+	return op - ostart;
+}
+
+size_t FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt)
+{
+	const void *ptr = dt;
+	const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr;
+	const U32 fastMode = DTableH->fastMode;
+
+	/* select fast mode (static) */
+	if (fastMode)
+		return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+	return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize)
+{
+	const BYTE *const istart = (const BYTE *)cSrc;
+	const BYTE *ip = istart;
+	unsigned tableLog;
+	unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+	size_t NCountLength;
+
+	FSE_DTable *dt;
+	short *counting;
+	size_t spaceUsed32 = 0;
+
+	FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32));
+
+	dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog);
+	counting = (short *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	/* normal FSE decoding mode */
+	NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+	if (FSE_isError(NCountLength))
+		return NCountLength;
+	// if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining
+	// case : NCountLength==cSrcSize */
+	if (tableLog > maxLog)
+		return ERROR(tableLog_tooLarge);
+	ip += NCountLength;
+	cSrcSize -= NCountLength;
+
+	CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize));
+
+	return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */
+}

diff --git a/lib/zstd/huf.h b/lib/zstd/huf.h
new file mode 100644
index 0000000..2143da2
--- /dev/null
+++ b/lib/zstd/huf.h

@@ -0,0 +1,212 @@
+/*
+ * Huffman coder, part of New Generation Entropy library
+ * header file
+ * Copyright (C) 2013-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+#include <linux/types.h> /* size_t */
+
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
+size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
+
+/* Error Management */
+unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
+
+/* ***   Advanced function   *** */
+
+/** HUF_compress4X_wksp() :
+*   Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
+size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
+			   size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
+
+/* *** Dependencies *** */
+#include "mem.h" /* U32 */
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX 12     /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
+#define HUF_SYMBOLVALUE_MAX 255
+
+#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8)			 /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+	U32 name##hb[maxSymbolValue + 1];              \
+	void *name##hv = &(name##hb);                  \
+	HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)}
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)}
+
+/* The workspace must have alignment at least 4 and be at least this large */
+#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10)
+#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+/* The workspace must have alignment at least 4 and be at least this large */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
+				size_t workspaceSize);							       /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
+				   size_t workspaceSize); /**< single-symbol decoder */
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
+				   size_t workspaceSize); /**< double-symbols decoder */
+
+/* ****************************************
+*  HUF detailed API
+******************************************/
+/*!
+HUF_compress() does the following:
+1. count symbol occurrence from source[] into table count[] using FSE_count()
+2. (optional) refine tableLog using HUF_optimalTableLog()
+3. build Huffman table from count using HUF_buildCTable()
+4. save Huffman table to memory buffer using HUF_writeCTable_wksp()
+5. encode the data stream using HUF_compress4X_usingCTable()
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and regenerate 'CTable' using external methods.
+*/
+/* FSE_count() : find it within "fse.h" */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
+size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize);
+size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
+
+typedef enum {
+	HUF_repeat_none,  /**< Cannot use the previous table */
+	HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1,
+			     4}X_repeat */
+	HUF_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+} HUF_repeat;
+/** HUF_compress4X_repeat() :
+*   Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+*   If it uses hufTable it does not modify hufTable or repeat.
+*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+*   If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
+			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
+			     int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
+size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+	Read compact Huffman tree, saved by HUF_writeCTable().
+	`huffWeight` is destination buffer.
+	@return : size read from `src` , or an error Code .
+	Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize,
+			  void *workspace, size_t workspaceSize);
+
+/** HUF_readCTable() :
+*   Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
+
+/*
+HUF_decompress() does the following:
+1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+2. build Huffman table from save, using HUF_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
+*/
+
+/** HUF_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize);
+
+size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
+size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
+
+size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
+size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
+size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
+
+/* single stream variants */
+
+size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
+			   size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
+/** HUF_compress1X_repeat() :
+*   Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+*   If it uses hufTable it does not modify hufTable or repeat.
+*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+*   If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
+			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
+			     int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize);
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
+				   size_t workspaceSize); /**< single-symbol decoder */
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
+				   size_t workspaceSize); /**< double-symbols decoder */
+
+size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize,
+				    const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
+size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
+size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
+
+#endif /* HUF_H_298734234 */

diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
new file mode 100644
index 0000000..40055a7
--- /dev/null
+++ b/lib/zstd/huf_compress.c

@@ -0,0 +1,770 @@
+/*
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (C) 2013-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "bitstream.h"
+#include "fse.h" /* header compression */
+#include "huf.h"
+#include <linux/kernel.h>
+#include <linux/string.h> /* memcpy, memset */
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c)                                   \
+	{                                                      \
+		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
+	} /* use only *after* variable declarations */
+#define CHECK_V_F(e, f)     \
+	size_t const e = f; \
+	if (ERR_isError(e)) \
+	return f
+#define CHECK_F(f)                        \
+	{                                 \
+		CHECK_V_F(_var_err__, f); \
+	}
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
+{
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *op = ostart;
+	BYTE *const oend = ostart + dstSize;
+
+	U32 maxSymbolValue = HUF_TABLELOG_MAX;
+	U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+
+	FSE_CTable *CTable;
+	U32 *count;
+	S16 *norm;
+	size_t spaceUsed32 = 0;
+
+	HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
+
+	CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
+	count = (U32 *)workspace + spaceUsed32;
+	spaceUsed32 += HUF_TABLELOG_MAX + 1;
+	norm = (S16 *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	/* init conditions */
+	if (wtSize <= 1)
+		return 0; /* Not compressible */
+
+	/* Scan input and build symbol stats */
+	{
+		CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
+		if (maxCount == wtSize)
+			return 1; /* only a single symbol in src : rle */
+		if (maxCount == 1)
+			return 0; /* each symbol present maximum once => not compressible */
+	}
+
+	tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+	CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
+
+	/* Write table description header */
+	{
+		CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
+		op += hSize;
+	}
+
+	/* Compress */
+	CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
+	{
+		CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
+		if (cSize == 0)
+			return 0; /* not enough space for compressed data */
+		op += cSize;
+	}
+
+	return op - ostart;
+}
+
+struct HUF_CElt_s {
+	U16 val;
+	BYTE nbBits;
+}; /* typedef'd to HUF_CElt within "huf.h" */
+
+/*! HUF_writeCTable_wksp() :
+	`CTable` : Huffman tree to save, using huf representation.
+	@return : size of saved CTable */
+size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
+{
+	BYTE *op = (BYTE *)dst;
+	U32 n;
+
+	BYTE *bitsToWeight;
+	BYTE *huffWeight;
+	size_t spaceUsed32 = 0;
+
+	bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
+	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	/* check conditions */
+	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+		return ERROR(maxSymbolValue_tooLarge);
+
+	/* convert to weight */
+	bitsToWeight[0] = 0;
+	for (n = 1; n < huffLog + 1; n++)
+		bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+	for (n = 0; n < maxSymbolValue; n++)
+		huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+
+	/* attempt weights compression by FSE */
+	{
+		CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
+		if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
+			op[0] = (BYTE)hSize;
+			return hSize + 1;
+		}
+	}
+
+	/* write raw values as 4-bits (max : 15) */
+	if (maxSymbolValue > (256 - 128))
+		return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
+	if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
+		return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
+	op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
+	huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
+	for (n = 0; n < maxSymbolValue; n += 2)
+		op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
+	return ((maxSymbolValue + 1) / 2) + 1;
+}
+
+size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
+{
+	U32 *rankVal;
+	BYTE *huffWeight;
+	U32 tableLog = 0;
+	U32 nbSymbols = 0;
+	size_t readSize;
+	size_t spaceUsed32 = 0;
+
+	rankVal = (U32 *)workspace + spaceUsed32;
+	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	/* get symbol weights */
+	readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
+	if (ERR_isError(readSize))
+		return readSize;
+
+	/* check result */
+	if (tableLog > HUF_TABLELOG_MAX)
+		return ERROR(tableLog_tooLarge);
+	if (nbSymbols > maxSymbolValue + 1)
+		return ERROR(maxSymbolValue_tooSmall);
+
+	/* Prepare base value per rank */
+	{
+		U32 n, nextRankStart = 0;
+		for (n = 1; n <= tableLog; n++) {
+			U32 curr = nextRankStart;
+			nextRankStart += (rankVal[n] << (n - 1));
+			rankVal[n] = curr;
+		}
+	}
+
+	/* fill nbBits */
+	{
+		U32 n;
+		for (n = 0; n < nbSymbols; n++) {
+			const U32 w = huffWeight[n];
+			CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+		}
+	}
+
+	/* fill val */
+	{
+		U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
+		U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
+		{
+			U32 n;
+			for (n = 0; n < nbSymbols; n++)
+				nbPerRank[CTable[n].nbBits]++;
+		}
+		/* determine stating value per rank */
+		valPerRank[tableLog + 1] = 0; /* for w==0 */
+		{
+			U16 min = 0;
+			U32 n;
+			for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
+				valPerRank[n] = min;     /* get starting value within each rank */
+				min += nbPerRank[n];
+				min >>= 1;
+			}
+		}
+		/* assign value within rank, symbol order */
+		{
+			U32 n;
+			for (n = 0; n <= maxSymbolValue; n++)
+				CTable[n].val = valPerRank[CTable[n].nbBits]++;
+		}
+	}
+
+	return readSize;
+}
+
+typedef struct nodeElt_s {
+	U32 count;
+	U16 parent;
+	BYTE byte;
+	BYTE nbBits;
+} nodeElt;
+
+static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+	const U32 largestBits = huffNode[lastNonNull].nbBits;
+	if (largestBits <= maxNbBits)
+		return largestBits; /* early exit : no elt > maxNbBits */
+
+	/* there are several too large elements (at least >= 2) */
+	{
+		int totalCost = 0;
+		const U32 baseCost = 1 << (largestBits - maxNbBits);
+		U32 n = lastNonNull;
+
+		while (huffNode[n].nbBits > maxNbBits) {
+			totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+			huffNode[n].nbBits = (BYTE)maxNbBits;
+			n--;
+		} /* n stops at huffNode[n].nbBits <= maxNbBits */
+		while (huffNode[n].nbBits == maxNbBits)
+			n--; /* n end at index of smallest symbol using < maxNbBits */
+
+		/* renorm totalCost */
+		totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
+
+		/* repay normalized cost */
+		{
+			U32 const noSymbol = 0xF0F0F0F0;
+			U32 rankLast[HUF_TABLELOG_MAX + 2];
+			int pos;
+
+			/* Get pos of last (smallest) symbol per rank */
+			memset(rankLast, 0xF0, sizeof(rankLast));
+			{
+				U32 currNbBits = maxNbBits;
+				for (pos = n; pos >= 0; pos--) {
+					if (huffNode[pos].nbBits >= currNbBits)
+						continue;
+					currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
+					rankLast[maxNbBits - currNbBits] = pos;
+				}
+			}
+
+			while (totalCost > 0) {
+				U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+				for (; nBitsToDecrease > 1; nBitsToDecrease--) {
+					U32 highPos = rankLast[nBitsToDecrease];
+					U32 lowPos = rankLast[nBitsToDecrease - 1];
+					if (highPos == noSymbol)
+						continue;
+					if (lowPos == noSymbol)
+						break;
+					{
+						U32 const highTotal = huffNode[highPos].count;
+						U32 const lowTotal = 2 * huffNode[lowPos].count;
+						if (highTotal <= lowTotal)
+							break;
+					}
+				}
+				/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+				/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+				while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+					nBitsToDecrease++;
+				totalCost -= 1 << (nBitsToDecrease - 1);
+				if (rankLast[nBitsToDecrease - 1] == noSymbol)
+					rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
+				huffNode[rankLast[nBitsToDecrease]].nbBits++;
+				if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
+					rankLast[nBitsToDecrease] = noSymbol;
+				else {
+					rankLast[nBitsToDecrease]--;
+					if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
+						rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
+				}
+			} /* while (totalCost > 0) */
+
+			while (totalCost < 0) {		       /* Sometimes, cost correction overshoot */
+				if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
+								  (using maxNbBits) */
+					while (huffNode[n].nbBits == maxNbBits)
+						n--;
+					huffNode[n + 1].nbBits--;
+					rankLast[1] = n + 1;
+					totalCost++;
+					continue;
+				}
+				huffNode[rankLast[1] + 1].nbBits--;
+				rankLast[1]++;
+				totalCost++;
+			}
+		}
+	} /* there are several too large elements (at least >= 2) */
+
+	return maxNbBits;
+}
+
+typedef struct {
+	U32 base;
+	U32 curr;
+} rankPos;
+
+static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
+{
+	rankPos rank[32];
+	U32 n;
+
+	memset(rank, 0, sizeof(rank));
+	for (n = 0; n <= maxSymbolValue; n++) {
+		U32 r = BIT_highbit32(count[n] + 1);
+		rank[r].base++;
+	}
+	for (n = 30; n > 0; n--)
+		rank[n - 1].base += rank[n].base;
+	for (n = 0; n < 32; n++)
+		rank[n].curr = rank[n].base;
+	for (n = 0; n <= maxSymbolValue; n++) {
+		U32 const c = count[n];
+		U32 const r = BIT_highbit32(c + 1) + 1;
+		U32 pos = rank[r].curr++;
+		while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
+			huffNode[pos] = huffNode[pos - 1], pos--;
+		huffNode[pos].count = c;
+		huffNode[pos].byte = (BYTE)n;
+	}
+}
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
+typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
+size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
+{
+	nodeElt *const huffNode0 = (nodeElt *)workSpace;
+	nodeElt *const huffNode = huffNode0 + 1;
+	U32 n, nonNullRank;
+	int lowS, lowN;
+	U16 nodeNb = STARTNODE;
+	U32 nodeRoot;
+
+	/* safety checks */
+	if (wkspSize < sizeof(huffNodeTable))
+		return ERROR(GENERIC); /* workSpace is not large enough */
+	if (maxNbBits == 0)
+		maxNbBits = HUF_TABLELOG_DEFAULT;
+	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+		return ERROR(GENERIC);
+	memset(huffNode0, 0, sizeof(huffNodeTable));
+
+	/* sort, decreasing order */
+	HUF_sort(huffNode, count, maxSymbolValue);
+
+	/* init for parents */
+	nonNullRank = maxSymbolValue;
+	while (huffNode[nonNullRank].count == 0)
+		nonNullRank--;
+	lowS = nonNullRank;
+	nodeRoot = nodeNb + lowS - 1;
+	lowN = nodeNb;
+	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
+	huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
+	nodeNb++;
+	lowS -= 2;
+	for (n = nodeNb; n <= nodeRoot; n++)
+		huffNode[n].count = (U32)(1U << 30);
+	huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
+
+	/* create parents */
+	while (nodeNb <= nodeRoot) {
+		U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+		U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+		huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+		huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+		nodeNb++;
+	}
+
+	/* distribute weights (unlimited tree height) */
+	huffNode[nodeRoot].nbBits = 0;
+	for (n = nodeRoot - 1; n >= STARTNODE; n--)
+		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
+	for (n = 0; n <= nonNullRank; n++)
+		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
+
+	/* enforce maxTableLog */
+	maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+
+	/* fill result into tree (val, nbBits) */
+	{
+		U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
+		U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
+		if (maxNbBits > HUF_TABLELOG_MAX)
+			return ERROR(GENERIC); /* check fit into table */
+		for (n = 0; n <= nonNullRank; n++)
+			nbPerRank[huffNode[n].nbBits]++;
+		/* determine stating value per rank */
+		{
+			U16 min = 0;
+			for (n = maxNbBits; n > 0; n--) {
+				valPerRank[n] = min; /* get starting value within each rank */
+				min += nbPerRank[n];
+				min >>= 1;
+			}
+		}
+		for (n = 0; n <= maxSymbolValue; n++)
+			tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
+		for (n = 0; n <= maxSymbolValue; n++)
+			tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
+	}
+
+	return maxNbBits;
+}
+
+static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
+{
+	size_t nbBits = 0;
+	int s;
+	for (s = 0; s <= (int)maxSymbolValue; ++s) {
+		nbBits += CTable[s].nbBits * count[s];
+	}
+	return nbBits >> 3;
+}
+
+static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
+{
+	int bad = 0;
+	int s;
+	for (s = 0; s <= (int)maxSymbolValue; ++s) {
+		bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+	}
+	return !bad;
+}
+
+static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
+{
+	BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream)                                            \
+	if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
+	HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream)                                            \
+	if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
+	HUF_FLUSHBITS(stream)
+
+size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
+{
+	const BYTE *ip = (const BYTE *)src;
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *const oend = ostart + dstSize;
+	BYTE *op = ostart;
+	size_t n;
+	BIT_CStream_t bitC;
+
+	/* init */
+	if (dstSize < 8)
+		return 0; /* not enough space to compress */
+	{
+		size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
+		if (HUF_isError(initErr))
+			return 0;
+	}
+
+	n = srcSize & ~3; /* join to mod 4 */
+	switch (srcSize & 3) {
+	case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
+	case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
+	case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
+	case 0:
+	default:;
+	}
+
+	for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
+		HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
+		HUF_FLUSHBITS_1(&bitC);
+		HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
+		HUF_FLUSHBITS_2(&bitC);
+		HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
+		HUF_FLUSHBITS_1(&bitC);
+		HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
+		HUF_FLUSHBITS(&bitC);
+	}
+
+	return BIT_closeCStream(&bitC);
+}
+
+size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
+{
+	size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
+	const BYTE *ip = (const BYTE *)src;
+	const BYTE *const iend = ip + srcSize;
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *const oend = ostart + dstSize;
+	BYTE *op = ostart;
+
+	if (dstSize < 6 + 1 + 1 + 1 + 8)
+		return 0; /* minimum space to compress successfully */
+	if (srcSize < 12)
+		return 0; /* no saving possible : too small input */
+	op += 6;	  /* jumpTable */
+
+	{
+		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
+		if (cSize == 0)
+			return 0;
+		ZSTD_writeLE16(ostart, (U16)cSize);
+		op += cSize;
+	}
+
+	ip += segmentSize;
+	{
+		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
+		if (cSize == 0)
+			return 0;
+		ZSTD_writeLE16(ostart + 2, (U16)cSize);
+		op += cSize;
+	}
+
+	ip += segmentSize;
+	{
+		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
+		if (cSize == 0)
+			return 0;
+		ZSTD_writeLE16(ostart + 4, (U16)cSize);
+		op += cSize;
+	}
+
+	ip += segmentSize;
+	{
+		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
+		if (cSize == 0)
+			return 0;
+		op += cSize;
+	}
+
+	return op - ostart;
+}
+
+static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
+					  const HUF_CElt *CTable)
+{
+	size_t const cSize =
+	    singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+	if (HUF_isError(cSize)) {
+		return cSize;
+	}
+	if (cSize == 0) {
+		return 0;
+	} /* uncompressible */
+	op += cSize;
+	/* check compressibility */
+	if ((size_t)(op - ostart) >= srcSize - 1) {
+		return 0;
+	}
+	return op - ostart;
+}
+
+/* `workSpace` must a table of at least 1024 unsigned */
+static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
+				    unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
+{
+	BYTE *const ostart = (BYTE *)dst;
+	BYTE *const oend = ostart + dstSize;
+	BYTE *op = ostart;
+
+	U32 *count;
+	size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
+	HUF_CElt *CTable;
+	size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
+
+	/* checks & inits */
+	if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
+		return ERROR(GENERIC);
+	if (!srcSize)
+		return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
+	if (!dstSize)
+		return 0; /* cannot fit within dst budget */
+	if (srcSize > HUF_BLOCKSIZE_MAX)
+		return ERROR(srcSize_wrong); /* curr block size limit */
+	if (huffLog > HUF_TABLELOG_MAX)
+		return ERROR(tableLog_tooLarge);
+	if (!maxSymbolValue)
+		maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+	if (!huffLog)
+		huffLog = HUF_TABLELOG_DEFAULT;
+
+	count = (U32 *)workSpace;
+	workSpace = (BYTE *)workSpace + countSize;
+	wkspSize -= countSize;
+	CTable = (HUF_CElt *)workSpace;
+	workSpace = (BYTE *)workSpace + CTableSize;
+	wkspSize -= CTableSize;
+
+	/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
+	if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+		return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+	}
+
+	/* Scan input and build symbol stats */
+	{
+		CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
+		if (largest == srcSize) {
+			*ostart = ((const BYTE *)src)[0];
+			return 1;
+		} /* single symbol, rle */
+		if (largest <= (srcSize >> 7) + 1)
+			return 0; /* Fast heuristic : not compressible enough */
+	}
+
+	/* Check validity of previous table */
+	if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
+		*repeat = HUF_repeat_none;
+	}
+	/* Heuristic : use existing table for small inputs */
+	if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+		return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+	}
+
+	/* Build Huffman Tree */
+	huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+	{
+		CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
+		huffLog = (U32)maxBits;
+		/* Zero the unused symbols so we can check it for validity */
+		memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
+	}
+
+	/* Write table description header */
+	{
+		CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
+		/* Check if using the previous table will be beneficial */
+		if (repeat && *repeat != HUF_repeat_none) {
+			size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
+			size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
+			if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+				return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+			}
+		}
+		/* Use the new table */
+		if (hSize + 12ul >= srcSize) {
+			return 0;
+		}
+		op += hSize;
+		if (repeat) {
+			*repeat = HUF_repeat_none;
+		}
+		if (oldHufTable) {
+			memcpy(oldHufTable, CTable, CTableSize);
+		} /* Save the new table */
+	}
+	return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
+}
+
+size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+			   size_t wkspSize)
+{
+	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
+{
+	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
+				     preferRepeat);
+}
+
+size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+			   size_t wkspSize)
+{
+	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
+{
+	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
+				     preferRepeat);
+}

diff --git a/lib/zstd/huf_decompress.c b/lib/zstd/huf_decompress.c
new file mode 100644
index 0000000..6526482
--- /dev/null
+++ b/lib/zstd/huf_decompress.c

@@ -0,0 +1,960 @@
+/*
+ * Huffman decoder, part of New Generation Entropy library
+ * Copyright (C) 2013-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#define FORCE_INLINE static __always_inline
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include "bitstream.h" /* BIT_* */
+#include "fse.h"       /* header compression */
+#include "huf.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memcpy, memset */
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c)                                   \
+	{                                                      \
+		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
+	} /* use only *after* variable declarations */
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+
+typedef struct {
+	BYTE maxTableLog;
+	BYTE tableType;
+	BYTE tableLog;
+	BYTE reserved;
+} DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
+{
+	DTableDesc dtd;
+	memcpy(&dtd, table, sizeof(dtd));
+	return dtd;
+}
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+typedef struct {
+	BYTE byte;
+	BYTE nbBits;
+} HUF_DEltX2; /* single-symbol decoding */
+
+size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
+{
+	U32 tableLog = 0;
+	U32 nbSymbols = 0;
+	size_t iSize;
+	void *const dtPtr = DTable + 1;
+	HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
+
+	U32 *rankVal;
+	BYTE *huffWeight;
+	size_t spaceUsed32 = 0;
+
+	rankVal = (U32 *)workspace + spaceUsed32;
+	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+	/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
+
+	iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
+	if (HUF_isError(iSize))
+		return iSize;
+
+	/* Table header */
+	{
+		DTableDesc dtd = HUF_getDTableDesc(DTable);
+		if (tableLog > (U32)(dtd.maxTableLog + 1))
+			return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
+		dtd.tableType = 0;
+		dtd.tableLog = (BYTE)tableLog;
+		memcpy(DTable, &dtd, sizeof(dtd));
+	}
+
+	/* Calculate starting value for each rank */
+	{
+		U32 n, nextRankStart = 0;
+		for (n = 1; n < tableLog + 1; n++) {
+			U32 const curr = nextRankStart;
+			nextRankStart += (rankVal[n] << (n - 1));
+			rankVal[n] = curr;
+		}
+	}
+
+	/* fill DTable */
+	{
+		U32 n;
+		for (n = 0; n < nbSymbols; n++) {
+			U32 const w = huffWeight[n];
+			U32 const length = (1 << w) >> 1;
+			U32 u;
+			HUF_DEltX2 D;
+			D.byte = (BYTE)n;
+			D.nbBits = (BYTE)(tableLog + 1 - w);
+			for (u = rankVal[w]; u < rankVal[w] + length; u++)
+				dt[u] = D;
+			rankVal[w] += length;
+		}
+	}
+
+	return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
+{
+	size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+	BYTE const c = dt[val].byte;
+	BIT_skipBits(Dstream, dt[val].nbBits);
+	return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
+	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
+	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+	if (ZSTD_64bits())                     \
+	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
+{
+	BYTE *const pStart = p;
+
+	/* up to 4 symbols at a time */
+	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
+		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+		HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+	}
+
+	/* closer to the end */
+	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+	/* no more data to retrieve from bitstream, hence no need to reload */
+	while (p < pEnd)
+		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+	return pEnd - pStart;
+}
+
+static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	BYTE *op = (BYTE *)dst;
+	BYTE *const oend = op + dstSize;
+	const void *dtPtr = DTable + 1;
+	const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
+	BIT_DStream_t bitD;
+	DTableDesc const dtd = HUF_getDTableDesc(DTable);
+	U32 const dtLog = dtd.tableLog;
+
+	{
+		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
+		if (HUF_isError(errorCode))
+			return errorCode;
+	}
+
+	HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+	/* check */
+	if (!BIT_endOfDStream(&bitD))
+		return ERROR(corruption_detected);
+
+	return dstSize;
+}
+
+size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	DTableDesc dtd = HUF_getDTableDesc(DTable);
+	if (dtd.tableType != 0)
+		return ERROR(GENERIC);
+	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	const BYTE *ip = (const BYTE *)cSrc;
+
+	size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
+	if (HUF_isError(hSize))
+		return hSize;
+	if (hSize >= cSrcSize)
+		return ERROR(srcSize_wrong);
+	ip += hSize;
+	cSrcSize -= hSize;
+
+	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	/* Check */
+	if (cSrcSize < 10)
+		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+	{
+		const BYTE *const istart = (const BYTE *)cSrc;
+		BYTE *const ostart = (BYTE *)dst;
+		BYTE *const oend = ostart + dstSize;
+		const void *const dtPtr = DTable + 1;
+		const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
+
+		/* Init */
+		BIT_DStream_t bitD1;
+		BIT_DStream_t bitD2;
+		BIT_DStream_t bitD3;
+		BIT_DStream_t bitD4;
+		size_t const length1 = ZSTD_readLE16(istart);
+		size_t const length2 = ZSTD_readLE16(istart + 2);
+		size_t const length3 = ZSTD_readLE16(istart + 4);
+		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+		const BYTE *const istart1 = istart + 6; /* jumpTable */
+		const BYTE *const istart2 = istart1 + length1;
+		const BYTE *const istart3 = istart2 + length2;
+		const BYTE *const istart4 = istart3 + length3;
+		const size_t segmentSize = (dstSize + 3) / 4;
+		BYTE *const opStart2 = ostart + segmentSize;
+		BYTE *const opStart3 = opStart2 + segmentSize;
+		BYTE *const opStart4 = opStart3 + segmentSize;
+		BYTE *op1 = ostart;
+		BYTE *op2 = opStart2;
+		BYTE *op3 = opStart3;
+		BYTE *op4 = opStart4;
+		U32 endSignal;
+		DTableDesc const dtd = HUF_getDTableDesc(DTable);
+		U32 const dtLog = dtd.tableLog;
+
+		if (length4 > cSrcSize)
+			return ERROR(corruption_detected); /* overflow */
+		{
+			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+		{
+			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+		{
+			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+		{
+			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+
+		/* 16-32 symbols per loop (4-8 symbols per stream) */
+		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+		for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
+			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+			HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+			HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+			HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+			HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+			HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+			HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+			HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+			HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+		}
+
+		/* check corruption */
+		if (op1 > opStart2)
+			return ERROR(corruption_detected);
+		if (op2 > opStart3)
+			return ERROR(corruption_detected);
+		if (op3 > opStart4)
+			return ERROR(corruption_detected);
+		/* note : op4 supposed already verified within main loop */
+
+		/* finish bitStreams one by one */
+		HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+		HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+		HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+		HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
+
+		/* check */
+		endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+		if (!endSignal)
+			return ERROR(corruption_detected);
+
+		/* decoded size */
+		return dstSize;
+	}
+}
+
+size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	DTableDesc dtd = HUF_getDTableDesc(DTable);
+	if (dtd.tableType != 0)
+		return ERROR(GENERIC);
+	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	const BYTE *ip = (const BYTE *)cSrc;
+
+	size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
+	if (HUF_isError(hSize))
+		return hSize;
+	if (hSize >= cSrcSize)
+		return ERROR(srcSize_wrong);
+	ip += hSize;
+	cSrcSize -= hSize;
+
+	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+}
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+typedef struct {
+	U16 sequence;
+	BYTE nbBits;
+	BYTE length;
+} HUF_DEltX4; /* double-symbols decoding */
+
+typedef struct {
+	BYTE symbol;
+	BYTE weight;
+} sortedSymbol_t;
+
+/* HUF_fillDTableX4Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
+				   const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
+{
+	HUF_DEltX4 DElt;
+	U32 rankVal[HUF_TABLELOG_MAX + 1];
+
+	/* get pre-calculated rankVal */
+	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+	/* fill skipped values */
+	if (minWeight > 1) {
+		U32 i, skipSize = rankVal[minWeight];
+		ZSTD_writeLE16(&(DElt.sequence), baseSeq);
+		DElt.nbBits = (BYTE)(consumed);
+		DElt.length = 1;
+		for (i = 0; i < skipSize; i++)
+			DTable[i] = DElt;
+	}
+
+	/* fill DTable */
+	{
+		U32 s;
+		for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
+			const U32 symbol = sortedSymbols[s].symbol;
+			const U32 weight = sortedSymbols[s].weight;
+			const U32 nbBits = nbBitsBaseline - weight;
+			const U32 length = 1 << (sizeLog - nbBits);
+			const U32 start = rankVal[weight];
+			U32 i = start;
+			const U32 end = start + length;
+
+			ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+			DElt.nbBits = (BYTE)(nbBits + consumed);
+			DElt.length = 2;
+			do {
+				DTable[i++] = DElt;
+			} while (i < end); /* since length >= 1 */
+
+			rankVal[weight] += length;
+		}
+	}
+}
+
+typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
+			     rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
+{
+	U32 rankVal[HUF_TABLELOG_MAX + 1];
+	const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+	const U32 minBits = nbBitsBaseline - maxWeight;
+	U32 s;
+
+	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+	/* fill DTable */
+	for (s = 0; s < sortedListSize; s++) {
+		const U16 symbol = sortedList[s].symbol;
+		const U32 weight = sortedList[s].weight;
+		const U32 nbBits = nbBitsBaseline - weight;
+		const U32 start = rankVal[weight];
+		const U32 length = 1 << (targetLog - nbBits);
+
+		if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
+			U32 sortedRank;
+			int minWeight = nbBits + scaleLog;
+			if (minWeight < 1)
+				minWeight = 1;
+			sortedRank = rankStart[minWeight];
+			HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
+					       sortedListSize - sortedRank, nbBitsBaseline, symbol);
+		} else {
+			HUF_DEltX4 DElt;
+			ZSTD_writeLE16(&(DElt.sequence), symbol);
+			DElt.nbBits = (BYTE)(nbBits);
+			DElt.length = 1;
+			{
+				U32 const end = start + length;
+				U32 u;
+				for (u = start; u < end; u++)
+					DTable[u] = DElt;
+			}
+		}
+		rankVal[weight] += length;
+	}
+}
+
+size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
+{
+	U32 tableLog, maxW, sizeOfSort, nbSymbols;
+	DTableDesc dtd = HUF_getDTableDesc(DTable);
+	U32 const maxTableLog = dtd.maxTableLog;
+	size_t iSize;
+	void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
+	HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
+	U32 *rankStart;
+
+	rankValCol_t *rankVal;
+	U32 *rankStats;
+	U32 *rankStart0;
+	sortedSymbol_t *sortedSymbol;
+	BYTE *weightList;
+	size_t spaceUsed32 = 0;
+
+	HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
+
+	rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
+	rankStats = (U32 *)workspace + spaceUsed32;
+	spaceUsed32 += HUF_TABLELOG_MAX + 1;
+	rankStart0 = (U32 *)workspace + spaceUsed32;
+	spaceUsed32 += HUF_TABLELOG_MAX + 2;
+	sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
+	weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
+	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+	if ((spaceUsed32 << 2) > workspaceSize)
+		return ERROR(tableLog_tooLarge);
+	workspace = (U32 *)workspace + spaceUsed32;
+	workspaceSize -= (spaceUsed32 << 2);
+
+	rankStart = rankStart0 + 1;
+	memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+
+	HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
+	if (maxTableLog > HUF_TABLELOG_MAX)
+		return ERROR(tableLog_tooLarge);
+	/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
+
+	iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
+	if (HUF_isError(iSize))
+		return iSize;
+
+	/* check result */
+	if (tableLog > maxTableLog)
+		return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
+
+	/* find maxWeight */
+	for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
+	} /* necessarily finds a solution before 0 */
+
+	/* Get start index of each weight */
+	{
+		U32 w, nextRankStart = 0;
+		for (w = 1; w < maxW + 1; w++) {
+			U32 curr = nextRankStart;
+			nextRankStart += rankStats[w];
+			rankStart[w] = curr;
+		}
+		rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
+		sizeOfSort = nextRankStart;
+	}
+
+	/* sort symbols by weight */
+	{
+		U32 s;
+		for (s = 0; s < nbSymbols; s++) {
+			U32 const w = weightList[s];
+			U32 const r = rankStart[w]++;
+			sortedSymbol[r].symbol = (BYTE)s;
+			sortedSymbol[r].weight = (BYTE)w;
+		}
+		rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
+	}
+
+	/* Build rankVal */
+	{
+		U32 *const rankVal0 = rankVal[0];
+		{
+			int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
+			U32 nextRankVal = 0;
+			U32 w;
+			for (w = 1; w < maxW + 1; w++) {
+				U32 curr = nextRankVal;
+				nextRankVal += rankStats[w] << (w + rescale);
+				rankVal0[w] = curr;
+			}
+		}
+		{
+			U32 const minBits = tableLog + 1 - maxW;
+			U32 consumed;
+			for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+				U32 *const rankValPtr = rankVal[consumed];
+				U32 w;
+				for (w = 1; w < maxW + 1; w++) {
+					rankValPtr[w] = rankVal0[w] >> consumed;
+				}
+			}
+		}
+	}
+
+	HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
+
+	dtd.tableLog = (BYTE)maxTableLog;
+	dtd.tableType = 1;
+	memcpy(DTable, &dtd, sizeof(dtd));
+	return iSize;
+}
+
+static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
+{
+	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+	memcpy(op, dt + val, 2);
+	BIT_skipBits(DStream, dt[val].nbBits);
+	return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
+{
+	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+	memcpy(op, dt + val, 1);
+	if (dt[val].length == 1)
+		BIT_skipBits(DStream, dt[val].nbBits);
+	else {
+		if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
+			BIT_skipBits(DStream, dt[val].nbBits);
+			if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
+				/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+				DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
+		}
+	}
+	return 1;
+}
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
+	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
+	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+	if (ZSTD_64bits())                     \
+	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
+{
+	BYTE *const pStart = p;
+
+	/* up to 8 symbols at a time */
+	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
+		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+		HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+	}
+
+	/* closer to end : up to 2 symbols at a time */
+	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
+		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+	while (p <= pEnd - 2)
+		HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
+
+	if (p < pEnd)
+		p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+	return p - pStart;
+}
+
+static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	BIT_DStream_t bitD;
+
+	/* Init */
+	{
+		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
+		if (HUF_isError(errorCode))
+			return errorCode;
+	}
+
+	/* decode */
+	{
+		BYTE *const ostart = (BYTE *)dst;
+		BYTE *const oend = ostart + dstSize;
+		const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
+		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
+		DTableDesc const dtd = HUF_getDTableDesc(DTable);
+		HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+	}
+
+	/* check */
+	if (!BIT_endOfDStream(&bitD))
+		return ERROR(corruption_detected);
+
+	/* decoded size */
+	return dstSize;
+}
+
+size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	DTableDesc dtd = HUF_getDTableDesc(DTable);
+	if (dtd.tableType != 1)
+		return ERROR(GENERIC);
+	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	const BYTE *ip = (const BYTE *)cSrc;
+
+	size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
+	if (HUF_isError(hSize))
+		return hSize;
+	if (hSize >= cSrcSize)
+		return ERROR(srcSize_wrong);
+	ip += hSize;
+	cSrcSize -= hSize;
+
+	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	if (cSrcSize < 10)
+		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+	{
+		const BYTE *const istart = (const BYTE *)cSrc;
+		BYTE *const ostart = (BYTE *)dst;
+		BYTE *const oend = ostart + dstSize;
+		const void *const dtPtr = DTable + 1;
+		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
+
+		/* Init */
+		BIT_DStream_t bitD1;
+		BIT_DStream_t bitD2;
+		BIT_DStream_t bitD3;
+		BIT_DStream_t bitD4;
+		size_t const length1 = ZSTD_readLE16(istart);
+		size_t const length2 = ZSTD_readLE16(istart + 2);
+		size_t const length3 = ZSTD_readLE16(istart + 4);
+		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+		const BYTE *const istart1 = istart + 6; /* jumpTable */
+		const BYTE *const istart2 = istart1 + length1;
+		const BYTE *const istart3 = istart2 + length2;
+		const BYTE *const istart4 = istart3 + length3;
+		size_t const segmentSize = (dstSize + 3) / 4;
+		BYTE *const opStart2 = ostart + segmentSize;
+		BYTE *const opStart3 = opStart2 + segmentSize;
+		BYTE *const opStart4 = opStart3 + segmentSize;
+		BYTE *op1 = ostart;
+		BYTE *op2 = opStart2;
+		BYTE *op3 = opStart3;
+		BYTE *op4 = opStart4;
+		U32 endSignal;
+		DTableDesc const dtd = HUF_getDTableDesc(DTable);
+		U32 const dtLog = dtd.tableLog;
+
+		if (length4 > cSrcSize)
+			return ERROR(corruption_detected); /* overflow */
+		{
+			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+		{
+			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+		{
+			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+		{
+			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
+			if (HUF_isError(errorCode))
+				return errorCode;
+		}
+
+		/* 16-32 symbols per loop (4-8 symbols per stream) */
+		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+		for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
+			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+			HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+			HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+			HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+			HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+			HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+			HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+			HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+			HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+		}
+
+		/* check corruption */
+		if (op1 > opStart2)
+			return ERROR(corruption_detected);
+		if (op2 > opStart3)
+			return ERROR(corruption_detected);
+		if (op3 > opStart4)
+			return ERROR(corruption_detected);
+		/* note : op4 already verified within main loop */
+
+		/* finish bitStreams one by one */
+		HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+		HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+		HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+		HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
+
+		/* check */
+		{
+			U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+			if (!endCheck)
+				return ERROR(corruption_detected);
+		}
+
+		/* decoded size */
+		return dstSize;
+	}
+}
+
+size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	DTableDesc dtd = HUF_getDTableDesc(DTable);
+	if (dtd.tableType != 1)
+		return ERROR(GENERIC);
+	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	const BYTE *ip = (const BYTE *)cSrc;
+
+	size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
+	if (HUF_isError(hSize))
+		return hSize;
+	if (hSize >= cSrcSize)
+		return ERROR(srcSize_wrong);
+	ip += hSize;
+	cSrcSize -= hSize;
+
+	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+}
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	DTableDesc const dtd = HUF_getDTableDesc(DTable);
+	return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
+			     : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
+{
+	DTableDesc const dtd = HUF_getDTableDesc(DTable);
+	return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
+			     : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+typedef struct {
+	U32 tableTime;
+	U32 decode256Time;
+} algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
+    /* single, double, quad */
+    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==0 : impossible */
+    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==1 : impossible */
+    {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
+    {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
+    {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
+    {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
+    {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
+    {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
+    {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
+    {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
+    {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
+    {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
+    {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
+};
+
+/** HUF_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
+{
+	/* decoder timing evaluation */
+	U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
+	U32 const D256 = (U32)(dstSize >> 8);
+	U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+	U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+	DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
+
+	return DTime1 < DTime0;
+}
+
+typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
+
+size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	/* validation checks */
+	if (dstSize == 0)
+		return ERROR(dstSize_tooSmall);
+	if (cSrcSize > dstSize)
+		return ERROR(corruption_detected); /* invalid */
+	if (cSrcSize == dstSize) {
+		memcpy(dst, cSrc, dstSize);
+		return dstSize;
+	} /* not compressed */
+	if (cSrcSize == 1) {
+		memset(dst, *(const BYTE *)cSrc, dstSize);
+		return dstSize;
+	} /* RLE */
+
+	{
+		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
+			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
+	}
+}
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	/* validation checks */
+	if (dstSize == 0)
+		return ERROR(dstSize_tooSmall);
+	if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
+		return ERROR(corruption_detected); /* invalid */
+
+	{
+		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
+			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
+	}
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
+{
+	/* validation checks */
+	if (dstSize == 0)
+		return ERROR(dstSize_tooSmall);
+	if (cSrcSize > dstSize)
+		return ERROR(corruption_detected); /* invalid */
+	if (cSrcSize == dstSize) {
+		memcpy(dst, cSrc, dstSize);
+		return dstSize;
+	} /* not compressed */
+	if (cSrcSize == 1) {
+		memset(dst, *(const BYTE *)cSrc, dstSize);
+		return dstSize;
+	} /* RLE */
+
+	{
+		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+		return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
+			      : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
+	}
+}

diff --git a/lib/zstd/mem.h b/lib/zstd/mem.h
new file mode 100644
index 0000000..3a0f34c
--- /dev/null
+++ b/lib/zstd/mem.h

@@ -0,0 +1,151 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <asm/unaligned.h>
+#include <linux/string.h> /* memcpy */
+#include <linux/types.h>  /* size_t, ptrdiff_t */
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#define ZSTD_STATIC static __inline __attribute__((unused))
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+typedef uint8_t BYTE;
+typedef uint16_t U16;
+typedef int16_t S16;
+typedef uint32_t U32;
+typedef int32_t S32;
+typedef uint64_t U64;
+typedef int64_t S64;
+typedef ptrdiff_t iPtrDiff;
+typedef uintptr_t uPtrDiff;
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; }
+ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; }
+
+#if defined(__LITTLE_ENDIAN)
+#define ZSTD_LITTLE_ENDIAN 1
+#else
+#define ZSTD_LITTLE_ENDIAN 0
+#endif
+
+ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; }
+
+ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); }
+
+ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); }
+
+ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); }
+
+ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); }
+
+ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); }
+
+ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); }
+
+ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); }
+
+/*=== Little endian r/w ===*/
+
+ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); }
+
+ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); }
+
+ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); }
+
+ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val)
+{
+	ZSTD_writeLE16(memPtr, (U16)val);
+	((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
+}
+
+ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); }
+
+ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); }
+
+ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); }
+
+ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); }
+
+ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr)
+{
+	if (ZSTD_32bits())
+		return (size_t)ZSTD_readLE32(memPtr);
+	else
+		return (size_t)ZSTD_readLE64(memPtr);
+}
+
+ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val)
+{
+	if (ZSTD_32bits())
+		ZSTD_writeLE32(memPtr, (U32)val);
+	else
+		ZSTD_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); }
+
+ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); }
+
+ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); }
+
+ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); }
+
+ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr)
+{
+	if (ZSTD_32bits())
+		return (size_t)ZSTD_readBE32(memPtr);
+	else
+		return (size_t)ZSTD_readBE64(memPtr);
+}
+
+ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val)
+{
+	if (ZSTD_32bits())
+		ZSTD_writeBE32(memPtr, (U32)val);
+	else
+		ZSTD_writeBE64(memPtr, (U64)val);
+}
+
+/* function safe only for comparisons */
+ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length)
+{
+	switch (length) {
+	default:
+	case 4: return ZSTD_read32(memPtr);
+	case 3:
+		if (ZSTD_isLittleEndian())
+			return ZSTD_read32(memPtr) << 8;
+		else
+			return ZSTD_read32(memPtr) >> 8;
+	}
+}
+
+#endif /* MEM_H_MODULE */

diff --git a/lib/zstd/zstd_common.c b/lib/zstd/zstd_common.c
new file mode 100644
index 0000000..a282624
--- /dev/null
+++ b/lib/zstd/zstd_common.c

@@ -0,0 +1,75 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "error_private.h"
+#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
+#include <linux/kernel.h>
+
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+
+#define stack_push(stack, size)                                 \
+	({                                                      \
+		void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \
+		(stack)->ptr = (char *)ptr + (size);            \
+		(stack)->ptr <= (stack)->end ? ptr : NULL;      \
+	})
+
+ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize)
+{
+	ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace};
+	ZSTD_stack *stack = (ZSTD_stack *)workspace;
+	/* Verify preconditions */
+	if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) {
+		ZSTD_customMem error = {NULL, NULL, NULL};
+		return error;
+	}
+	/* Initialize the stack */
+	stack->ptr = workspace;
+	stack->end = (char *)workspace + workspaceSize;
+	stack_push(stack, sizeof(ZSTD_stack));
+	return stackMem;
+}
+
+void *ZSTD_stackAllocAll(void *opaque, size_t *size)
+{
+	ZSTD_stack *stack = (ZSTD_stack *)opaque;
+	*size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr);
+	return stack_push(stack, *size);
+}
+
+void *ZSTD_stackAlloc(void *opaque, size_t size)
+{
+	ZSTD_stack *stack = (ZSTD_stack *)opaque;
+	return stack_push(stack, size);
+}
+void ZSTD_stackFree(void *opaque, void *address)
+{
+	(void)opaque;
+	(void)address;
+}
+
+void *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); }
+
+void ZSTD_free(void *ptr, ZSTD_customMem customMem)
+{
+	if (ptr != NULL)
+		customMem.customFree(customMem.opaque, ptr);
+}

diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
new file mode 100644
index 0000000..1a79fab
--- /dev/null
+++ b/lib/zstd/zstd_internal.h

@@ -0,0 +1,263 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#define FORCE_INLINE static __always_inline
+#define FORCE_NOINLINE static noinline
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "error_private.h"
+#include "mem.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/xxhash.h>
+#include <linux/zstd.h>
+
+/*-*************************************
+*  shared macros
+***************************************/
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define CHECK_F(f)                       \
+	{                                \
+		size_t const errcod = f; \
+		if (ERR_isError(errcod)) \
+			return errcod;   \
+	} /* check and Forward error code */
+#define CHECK_E(f, e)                    \
+	{                                \
+		size_t const errcod = f; \
+		if (ERR_isError(errcod)) \
+			return ERROR(e); \
+	} /* check and send Error code */
+#define ZSTD_STATIC_ASSERT(c)                                   \
+	{                                                       \
+		enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \
+	}
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM (1 << 12)
+#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
+
+#define ZSTD_REP_NUM 3		      /* number of repcodes */
+#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
+#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1)
+#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
+static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8};
+
+#define KB *(1 << 10)
+#define MB *(1 << 20)
+#define GB *(1U << 30)
+
+#define BIT7 128
+#define BIT6 64
+#define BIT5 32
+#define BIT4 16
+#define BIT1 2
+#define BIT0 1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8};
+static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4};
+
+#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define MIN_SEQUENCES_SIZE 1									  /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+#define EQUAL_READ32 4
+
+#define Litbits 8
+#define MaxLit ((1 << Litbits) - 1)
+#define MaxML 52
+#define MaxLL 35
+#define MaxOff 28
+#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog 9
+#define LLFSELog 9
+#define OffFSELog 8
+
+static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
+#define LL_DEFAULTNORMLOG 6 /* for static allocation */
+static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
+				       0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
+					      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
+#define ML_DEFAULTNORMLOG 6 /* for static allocation */
+static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
+#define OF_DEFAULTNORMLOG 5 /* for static allocation */
+static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
+	memcpy(dst, src, 8);
+}
+/*! ZSTD_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+#define WILDCOPY_OVERLENGTH 8
+ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
+{
+	const BYTE* ip = (const BYTE*)src;
+	BYTE* op = (BYTE*)dst;
+	BYTE* const oend = op + length;
+	/* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
+	 * Avoid the bad case where the loop only runs once by handling the
+	 * special case separately. This doesn't trigger the bug because it
+	 * doesn't involve pointer/integer overflow.
+	 */
+	if (length <= 8)
+		return ZSTD_copy8(dst, src);
+	do {
+		ZSTD_copy8(op, ip);
+		op += 8;
+		ip += 8;
+	} while (op < oend);
+}
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct ZSTD_stats_s ZSTD_stats_t;
+
+typedef struct {
+	U32 off;
+	U32 len;
+} ZSTD_match_t;
+
+typedef struct {
+	U32 price;
+	U32 off;
+	U32 mlen;
+	U32 litlen;
+	U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef struct seqDef_s {
+	U32 offset;
+	U16 litLength;
+	U16 matchLength;
+} seqDef;
+
+typedef struct {
+	seqDef *sequencesStart;
+	seqDef *sequences;
+	BYTE *litStart;
+	BYTE *lit;
+	BYTE *llCode;
+	BYTE *mlCode;
+	BYTE *ofCode;
+	U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
+	U32 longLengthPos;
+	/* opt */
+	ZSTD_optimal_t *priceTable;
+	ZSTD_match_t *matchTable;
+	U32 *matchLengthFreq;
+	U32 *litLengthFreq;
+	U32 *litFreq;
+	U32 *offCodeFreq;
+	U32 matchLengthSum;
+	U32 matchSum;
+	U32 litLengthSum;
+	U32 litSum;
+	U32 offCodeSum;
+	U32 log2matchLengthSum;
+	U32 log2matchSum;
+	U32 log2litLengthSum;
+	U32 log2litSum;
+	U32 log2offCodeSum;
+	U32 factor;
+	U32 staticPrices;
+	U32 cachedPrice;
+	U32 cachedLitLength;
+	const BYTE *cachedLiterals;
+} seqStore_t;
+
+const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx);
+void ZSTD_seqToCodes(const seqStore_t *seqStorePtr);
+int ZSTD_isSkipFrame(ZSTD_DCtx *dctx);
+
+/*= Custom memory allocation functions */
+typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size);
+typedef void (*ZSTD_freeFunction)(void *opaque, void *address);
+typedef struct {
+	ZSTD_allocFunction customAlloc;
+	ZSTD_freeFunction customFree;
+	void *opaque;
+} ZSTD_customMem;
+
+void *ZSTD_malloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_free(void *ptr, ZSTD_customMem customMem);
+
+/*====== stack allocation  ======*/
+
+typedef struct {
+	void *ptr;
+	const void *end;
+} ZSTD_stack;
+
+#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t))
+#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t))
+
+ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize);
+
+void *ZSTD_stackAllocAll(void *opaque, size_t *size);
+void *ZSTD_stackAlloc(void *opaque, size_t size);
+void ZSTD_stackFree(void *opaque, void *address);
+
+/*======  common function  ======*/
+
+ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); }
+
+/* hidden functions */
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx);
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx);
+size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx);
+size_t ZSTD_freeCDict(ZSTD_CDict *cdict);
+size_t ZSTD_freeDDict(ZSTD_DDict *cdict);
+size_t ZSTD_freeCStream(ZSTD_CStream *zcs);
+size_t ZSTD_freeDStream(ZSTD_DStream *zds);
+
+#endif /* ZSTD_CCOMMON_H_MODULE */

diff --git a/lib/zstd/zstd_opt.h b/lib/zstd/zstd_opt.h
new file mode 100644
index 0000000..55e1b4c
--- /dev/null
+++ b/lib/zstd/zstd_opt.h

@@ -0,0 +1,1014 @@
+/**
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+/* Note : this file is intended to be included within zstd_compress.c */
+
+#ifndef ZSTD_OPT_H_91842398743
+#define ZSTD_OPT_H_91842398743
+
+#define ZSTD_LITFREQ_ADD 2
+#define ZSTD_FREQ_DIV 4
+#define ZSTD_MAX_PRICE (1 << 30)
+
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr)
+{
+	ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1);
+	ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1);
+	ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1);
+	ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1);
+	ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum));
+}
+
+ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize)
+{
+	unsigned u;
+
+	ssPtr->cachedLiterals = NULL;
+	ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
+	ssPtr->staticPrices = 0;
+
+	if (ssPtr->litLengthSum == 0) {
+		if (srcSize <= 1024)
+			ssPtr->staticPrices = 1;
+
+		for (u = 0; u <= MaxLit; u++)
+			ssPtr->litFreq[u] = 0;
+		for (u = 0; u < srcSize; u++)
+			ssPtr->litFreq[src[u]]++;
+
+		ssPtr->litSum = 0;
+		ssPtr->litLengthSum = MaxLL + 1;
+		ssPtr->matchLengthSum = MaxML + 1;
+		ssPtr->offCodeSum = (MaxOff + 1);
+		ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits);
+
+		for (u = 0; u <= MaxLit; u++) {
+			ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV);
+			ssPtr->litSum += ssPtr->litFreq[u];
+		}
+		for (u = 0; u <= MaxLL; u++)
+			ssPtr->litLengthFreq[u] = 1;
+		for (u = 0; u <= MaxML; u++)
+			ssPtr->matchLengthFreq[u] = 1;
+		for (u = 0; u <= MaxOff; u++)
+			ssPtr->offCodeFreq[u] = 1;
+	} else {
+		ssPtr->matchLengthSum = 0;
+		ssPtr->litLengthSum = 0;
+		ssPtr->offCodeSum = 0;
+		ssPtr->matchSum = 0;
+		ssPtr->litSum = 0;
+
+		for (u = 0; u <= MaxLit; u++) {
+			ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1));
+			ssPtr->litSum += ssPtr->litFreq[u];
+		}
+		for (u = 0; u <= MaxLL; u++) {
+			ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1));
+			ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
+		}
+		for (u = 0; u <= MaxML; u++) {
+			ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV);
+			ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
+			ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
+		}
+		ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
+		for (u = 0; u <= MaxOff; u++) {
+			ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV);
+			ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
+		}
+	}
+
+	ZSTD_setLog2Prices(ssPtr);
+}
+
+FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals)
+{
+	U32 price, u;
+
+	if (ssPtr->staticPrices)
+		return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6);
+
+	if (litLength == 0)
+		return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1);
+
+	/* literals */
+	if (ssPtr->cachedLiterals == literals) {
+		U32 const additional = litLength - ssPtr->cachedLitLength;
+		const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
+		price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
+		for (u = 0; u < additional; u++)
+			price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1);
+		ssPtr->cachedPrice = price;
+		ssPtr->cachedLitLength = litLength;
+	} else {
+		price = litLength * ssPtr->log2litSum;
+		for (u = 0; u < litLength; u++)
+			price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1);
+
+		if (litLength >= 12) {
+			ssPtr->cachedLiterals = literals;
+			ssPtr->cachedPrice = price;
+			ssPtr->cachedLitLength = litLength;
+		}
+	}
+
+	/* literal Length */
+	{
+		const BYTE LL_deltaCode = 19;
+		const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+		price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1);
+	}
+
+	return price;
+}
+
+FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra)
+{
+	/* offset */
+	U32 price;
+	BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
+
+	if (seqStorePtr->staticPrices)
+		return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode;
+
+	price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1);
+	if (!ultra && offCode >= 20)
+		price += (offCode - 19) * 2;
+
+	/* match Length */
+	{
+		const BYTE ML_deltaCode = 36;
+		const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
+		price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1);
+	}
+
+	return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
+}
+
+ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength)
+{
+	U32 u;
+
+	/* literals */
+	seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD;
+	for (u = 0; u < litLength; u++)
+		seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+
+	/* literal Length */
+	{
+		const BYTE LL_deltaCode = 19;
+		const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+		seqStorePtr->litLengthFreq[llCode]++;
+		seqStorePtr->litLengthSum++;
+	}
+
+	/* match offset */
+	{
+		BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
+		seqStorePtr->offCodeSum++;
+		seqStorePtr->offCodeFreq[offCode]++;
+	}
+
+	/* match Length */
+	{
+		const BYTE ML_deltaCode = 36;
+		const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
+		seqStorePtr->matchLengthFreq[mlCode]++;
+		seqStorePtr->matchLengthSum++;
+	}
+
+	ZSTD_setLog2Prices(seqStorePtr);
+}
+
+#define SET_PRICE(pos, mlen_, offset_, litlen_, price_)           \
+	{                                                         \
+		while (last_pos < pos) {                          \
+			opt[last_pos + 1].price = ZSTD_MAX_PRICE; \
+			last_pos++;                               \
+		}                                                 \
+		opt[pos].mlen = mlen_;                            \
+		opt[pos].off = offset_;                           \
+		opt[pos].litlen = litlen_;                        \
+		opt[pos].price = price_;                          \
+	}
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE
+U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip)
+{
+	U32 *const hashTable3 = zc->hashTable3;
+	U32 const hashLog3 = zc->hashLog3;
+	const BYTE *const base = zc->base;
+	U32 idx = zc->nextToUpdate3;
+	const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
+	const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+
+	while (idx < target) {
+		hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx;
+		idx++;
+	}
+
+	return hashTable3[hash3];
+}
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict,
+					 ZSTD_match_t *matches, const U32 minMatchLen)
+{
+	const BYTE *const base = zc->base;
+	const U32 curr = (U32)(ip - base);
+	const U32 hashLog = zc->params.cParams.hashLog;
+	const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
+	U32 *const hashTable = zc->hashTable;
+	U32 matchIndex = hashTable[h];
+	U32 *const bt = zc->chainTable;
+	const U32 btLog = zc->params.cParams.chainLog - 1;
+	const U32 btMask = (1U << btLog) - 1;
+	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+	const BYTE *const dictBase = zc->dictBase;
+	const U32 dictLimit = zc->dictLimit;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+	const BYTE *const prefixStart = base + dictLimit;
+	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+	const U32 windowLow = zc->lowLimit;
+	U32 *smallerPtr = bt + 2 * (curr & btMask);
+	U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
+	U32 matchEndIdx = curr + 8;
+	U32 dummy32; /* to be nullified at the end */
+	U32 mnum = 0;
+
+	const U32 minMatch = (mls == 3) ? 3 : 4;
+	size_t bestLength = minMatchLen - 1;
+
+	if (minMatch == 3) { /* HC3 match finder */
+		U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip);
+		if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) {
+			const BYTE *match;
+			size_t currMl = 0;
+			if ((!extDict) || matchIndex3 >= dictLimit) {
+				match = base + matchIndex3;
+				if (match[bestLength] == ip[bestLength])
+					currMl = ZSTD_count(ip, match, iLimit);
+			} else {
+				match = dictBase + matchIndex3;
+				if (ZSTD_readMINMATCH(match, MINMATCH) ==
+				    ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
+					currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
+			}
+
+			/* save best solution */
+			if (currMl > bestLength) {
+				bestLength = currMl;
+				matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3;
+				matches[mnum].len = (U32)currMl;
+				mnum++;
+				if (currMl > ZSTD_OPT_NUM)
+					goto update;
+				if (ip + currMl == iLimit)
+					goto update; /* best possible, and avoid read overflow*/
+			}
+		}
+	}
+
+	hashTable[h] = curr; /* Update Hash Table */
+
+	while (nbCompares-- && (matchIndex > windowLow)) {
+		U32 *nextPtr = bt + 2 * (matchIndex & btMask);
+		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+		const BYTE *match;
+
+		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
+			match = base + matchIndex;
+			if (match[matchLength] == ip[matchLength]) {
+				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1;
+			}
+		} else {
+			match = dictBase + matchIndex;
+			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart);
+			if (matchIndex + matchLength >= dictLimit)
+				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+		}
+
+		if (matchLength > bestLength) {
+			if (matchLength > matchEndIdx - matchIndex)
+				matchEndIdx = matchIndex + (U32)matchLength;
+			bestLength = matchLength;
+			matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex;
+			matches[mnum].len = (U32)matchLength;
+			mnum++;
+			if (matchLength > ZSTD_OPT_NUM)
+				break;
+			if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */
+				break;			/* drop, to guarantee consistency (miss a little bit of compression) */
+		}
+
+		if (match[matchLength] < ip[matchLength]) {
+			/* match is smaller than curr */
+			*smallerPtr = matchIndex;	  /* update smaller idx */
+			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+			if (matchIndex <= btLow) {
+				smallerPtr = &dummy32;
+				break;
+			}			  /* beyond tree size, stop the search */
+			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
+			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
+		} else {
+			/* match is larger than curr */
+			*largerPtr = matchIndex;
+			commonLengthLarger = matchLength;
+			if (matchIndex <= btLow) {
+				largerPtr = &dummy32;
+				break;
+			} /* beyond tree size, stop the search */
+			largerPtr = nextPtr;
+			matchIndex = nextPtr[0];
+		}
+	}
+
+	*smallerPtr = *largerPtr = 0;
+
+update:
+	zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
+	return mnum;
+}
+
+/** Tree updater, providing best match */
+static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches,
+				const U32 minMatchLen)
+{
+	if (ip < zc->base + zc->nextToUpdate)
+		return 0; /* skipped area */
+	ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
+	return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
+}
+
+static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
+					  const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
+					  ZSTD_match_t *matches, const U32 minMatchLen)
+{
+	switch (matchLengthSearch) {
+	case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
+	default:
+	case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
+	case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+	case 7:
+	case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
+	}
+}
+
+/** Tree updater, providing best match */
+static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls,
+					ZSTD_match_t *matches, const U32 minMatchLen)
+{
+	if (ip < zc->base + zc->nextToUpdate)
+		return 0; /* skipped area */
+	ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
+	return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
+}
+
+static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
+						  const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
+						  ZSTD_match_t *matches, const U32 minMatchLen)
+{
+	switch (matchLengthSearch) {
+	case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
+	default:
+	case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
+	case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+	case 7:
+	case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
+	}
+}
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+FORCE_INLINE
+void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
+{
+	seqStore_t *seqStorePtr = &(ctx->seqStore);
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - 8;
+	const BYTE *const base = ctx->base;
+	const BYTE *const prefixStart = base + ctx->dictLimit;
+
+	const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
+	const U32 sufficient_len = ctx->params.cParams.targetLength;
+	const U32 mls = ctx->params.cParams.searchLength;
+	const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
+
+	ZSTD_optimal_t *opt = seqStorePtr->priceTable;
+	ZSTD_match_t *matches = seqStorePtr->matchTable;
+	const BYTE *inr;
+	U32 offset, rep[ZSTD_REP_NUM];
+
+	/* init */
+	ctx->nextToUpdate3 = ctx->nextToUpdate;
+	ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
+	ip += (ip == prefixStart);
+	{
+		U32 i;
+		for (i = 0; i < ZSTD_REP_NUM; i++)
+			rep[i] = ctx->rep[i];
+	}
+
+	/* Match Loop */
+	while (ip < ilimit) {
+		U32 cur, match_num, last_pos, litlen, price;
+		U32 u, mlen, best_mlen, best_off, litLength;
+		memset(opt, 0, sizeof(ZSTD_optimal_t));
+		last_pos = 0;
+		litlen = (U32)(ip - anchor);
+
+		/* check repCode */
+		{
+			U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
+			for (i = (ip == anchor); i < last_i; i++) {
+				const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
+				if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) &&
+				    (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
+					mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch;
+					if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
+						best_mlen = mlen;
+						best_off = i;
+						cur = 0;
+						last_pos = 1;
+						goto _storeSequence;
+					}
+					best_off = i - (ip == anchor);
+					do {
+						price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+						if (mlen > last_pos || price < opt[mlen].price)
+							SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
+						mlen--;
+					} while (mlen >= minMatch);
+				}
+			}
+		}
+
+		match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
+
+		if (!last_pos && !match_num) {
+			ip++;
+			continue;
+		}
+
+		if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
+			best_mlen = matches[match_num - 1].len;
+			best_off = matches[match_num - 1].off;
+			cur = 0;
+			last_pos = 1;
+			goto _storeSequence;
+		}
+
+		/* set prices using matches at position = 0 */
+		best_mlen = (last_pos) ? last_pos : minMatch;
+		for (u = 0; u < match_num; u++) {
+			mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
+			best_mlen = matches[u].len;
+			while (mlen <= best_mlen) {
+				price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
+				if (mlen > last_pos || price < opt[mlen].price)
+					SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
+				mlen++;
+			}
+		}
+
+		if (last_pos < minMatch) {
+			ip++;
+			continue;
+		}
+
+		/* initialize opt[0] */
+		{
+			U32 i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				opt[0].rep[i] = rep[i];
+		}
+		opt[0].mlen = 1;
+		opt[0].litlen = litlen;
+
+		/* check further positions */
+		for (cur = 1; cur <= last_pos; cur++) {
+			inr = ip + cur;
+
+			if (opt[cur - 1].mlen == 1) {
+				litlen = opt[cur - 1].litlen + 1;
+				if (cur > litlen) {
+					price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
+				} else
+					price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+			} else {
+				litlen = 1;
+				price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
+			}
+
+			if (cur > last_pos || price <= opt[cur].price)
+				SET_PRICE(cur, 1, 0, litlen, price);
+
+			if (cur == last_pos)
+				break;
+
+			if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
+				continue;
+
+			mlen = opt[cur].mlen;
+			if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
+				opt[cur].rep[2] = opt[cur - mlen].rep[1];
+				opt[cur].rep[1] = opt[cur - mlen].rep[0];
+				opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
+			} else {
+				opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
+				opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
+				opt[cur].rep[0] =
+				    ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
+			}
+
+			best_mlen = minMatch;
+			{
+				U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
+				for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */
+					const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+					if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) &&
+					    (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
+						mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch;
+
+						if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
+							best_mlen = mlen;
+							best_off = i;
+							last_pos = cur + 1;
+							goto _storeSequence;
+						}
+
+						best_off = i - (opt[cur].mlen != 1);
+						if (mlen > best_mlen)
+							best_mlen = mlen;
+
+						do {
+							if (opt[cur].mlen == 1) {
+								litlen = opt[cur].litlen;
+								if (cur > litlen) {
+									price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
+															best_off, mlen - MINMATCH, ultra);
+								} else
+									price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+							} else {
+								litlen = 0;
+								price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+							}
+
+							if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
+								SET_PRICE(cur + mlen, mlen, i, litlen, price);
+							mlen--;
+						} while (mlen >= minMatch);
+					}
+				}
+			}
+
+			match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
+
+			if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
+				best_mlen = matches[match_num - 1].len;
+				best_off = matches[match_num - 1].off;
+				last_pos = cur + 1;
+				goto _storeSequence;
+			}
+
+			/* set prices using matches at position = cur */
+			for (u = 0; u < match_num; u++) {
+				mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
+				best_mlen = matches[u].len;
+
+				while (mlen <= best_mlen) {
+					if (opt[cur].mlen == 1) {
+						litlen = opt[cur].litlen;
+						if (cur > litlen)
+							price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
+													matches[u].off - 1, mlen - MINMATCH, ultra);
+						else
+							price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
+					} else {
+						litlen = 0;
+						price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
+					}
+
+					if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
+						SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
+
+					mlen++;
+				}
+			}
+		}
+
+		best_mlen = opt[last_pos].mlen;
+		best_off = opt[last_pos].off;
+		cur = last_pos - best_mlen;
+
+	/* store sequence */
+_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
+		opt[0].mlen = 1;
+
+		while (1) {
+			mlen = opt[cur].mlen;
+			offset = opt[cur].off;
+			opt[cur].mlen = best_mlen;
+			opt[cur].off = best_off;
+			best_mlen = mlen;
+			best_off = offset;
+			if (mlen > cur)
+				break;
+			cur -= mlen;
+		}
+
+		for (u = 0; u <= last_pos;) {
+			u += opt[u].mlen;
+		}
+
+		for (cur = 0; cur < last_pos;) {
+			mlen = opt[cur].mlen;
+			if (mlen == 1) {
+				ip++;
+				cur++;
+				continue;
+			}
+			offset = opt[cur].off;
+			cur += mlen;
+			litLength = (U32)(ip - anchor);
+
+			if (offset > ZSTD_REP_MOVE_OPT) {
+				rep[2] = rep[1];
+				rep[1] = rep[0];
+				rep[0] = offset - ZSTD_REP_MOVE_OPT;
+				offset--;
+			} else {
+				if (offset != 0) {
+					best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
+					if (offset != 1)
+						rep[2] = rep[1];
+					rep[1] = rep[0];
+					rep[0] = best_off;
+				}
+				if (litLength == 0)
+					offset--;
+			}
+
+			ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
+			ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
+			anchor = ip = ip + mlen;
+		}
+	} /* for (cur=0; cur < last_pos; ) */
+
+	/* Save reps for next block */
+	{
+		int i;
+		for (i = 0; i < ZSTD_REP_NUM; i++)
+			ctx->repToConfirm[i] = rep[i];
+	}
+
+	/* Last Literals */
+	{
+		size_t const lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+FORCE_INLINE
+void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
+{
+	seqStore_t *seqStorePtr = &(ctx->seqStore);
+	const BYTE *const istart = (const BYTE *)src;
+	const BYTE *ip = istart;
+	const BYTE *anchor = istart;
+	const BYTE *const iend = istart + srcSize;
+	const BYTE *const ilimit = iend - 8;
+	const BYTE *const base = ctx->base;
+	const U32 lowestIndex = ctx->lowLimit;
+	const U32 dictLimit = ctx->dictLimit;
+	const BYTE *const prefixStart = base + dictLimit;
+	const BYTE *const dictBase = ctx->dictBase;
+	const BYTE *const dictEnd = dictBase + dictLimit;
+
+	const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
+	const U32 sufficient_len = ctx->params.cParams.targetLength;
+	const U32 mls = ctx->params.cParams.searchLength;
+	const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
+
+	ZSTD_optimal_t *opt = seqStorePtr->priceTable;
+	ZSTD_match_t *matches = seqStorePtr->matchTable;
+	const BYTE *inr;
+
+	/* init */
+	U32 offset, rep[ZSTD_REP_NUM];
+	{
+		U32 i;
+		for (i = 0; i < ZSTD_REP_NUM; i++)
+			rep[i] = ctx->rep[i];
+	}
+
+	ctx->nextToUpdate3 = ctx->nextToUpdate;
+	ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
+	ip += (ip == prefixStart);
+
+	/* Match Loop */
+	while (ip < ilimit) {
+		U32 cur, match_num, last_pos, litlen, price;
+		U32 u, mlen, best_mlen, best_off, litLength;
+		U32 curr = (U32)(ip - base);
+		memset(opt, 0, sizeof(ZSTD_optimal_t));
+		last_pos = 0;
+		opt[0].litlen = (U32)(ip - anchor);
+
+		/* check repCode */
+		{
+			U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
+			for (i = (ip == anchor); i < last_i; i++) {
+				const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
+				const U32 repIndex = (U32)(curr - repCur);
+				const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
+				const BYTE *const repMatch = repBase + repIndex;
+				if ((repCur > 0 && repCur <= (S32)curr) &&
+				    (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+				    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
+					/* repcode detected we should take it */
+					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
+					mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
+
+					if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
+						best_mlen = mlen;
+						best_off = i;
+						cur = 0;
+						last_pos = 1;
+						goto _storeSequence;
+					}
+
+					best_off = i - (ip == anchor);
+					litlen = opt[0].litlen;
+					do {
+						price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+						if (mlen > last_pos || price < opt[mlen].price)
+							SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
+						mlen--;
+					} while (mlen >= minMatch);
+				}
+			}
+		}
+
+		match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
+
+		if (!last_pos && !match_num) {
+			ip++;
+			continue;
+		}
+
+		{
+			U32 i;
+			for (i = 0; i < ZSTD_REP_NUM; i++)
+				opt[0].rep[i] = rep[i];
+		}
+		opt[0].mlen = 1;
+
+		if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
+			best_mlen = matches[match_num - 1].len;
+			best_off = matches[match_num - 1].off;
+			cur = 0;
+			last_pos = 1;
+			goto _storeSequence;
+		}
+
+		best_mlen = (last_pos) ? last_pos : minMatch;
+
+		/* set prices using matches at position = 0 */
+		for (u = 0; u < match_num; u++) {
+			mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
+			best_mlen = matches[u].len;
+			litlen = opt[0].litlen;
+			while (mlen <= best_mlen) {
+				price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
+				if (mlen > last_pos || price < opt[mlen].price)
+					SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
+				mlen++;
+			}
+		}
+
+		if (last_pos < minMatch) {
+			ip++;
+			continue;
+		}
+
+		/* check further positions */
+		for (cur = 1; cur <= last_pos; cur++) {
+			inr = ip + cur;
+
+			if (opt[cur - 1].mlen == 1) {
+				litlen = opt[cur - 1].litlen + 1;
+				if (cur > litlen) {
+					price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
+				} else
+					price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+			} else {
+				litlen = 1;
+				price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
+			}
+
+			if (cur > last_pos || price <= opt[cur].price)
+				SET_PRICE(cur, 1, 0, litlen, price);
+
+			if (cur == last_pos)
+				break;
+
+			if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
+				continue;
+
+			mlen = opt[cur].mlen;
+			if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
+				opt[cur].rep[2] = opt[cur - mlen].rep[1];
+				opt[cur].rep[1] = opt[cur - mlen].rep[0];
+				opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
+			} else {
+				opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
+				opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
+				opt[cur].rep[0] =
+				    ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
+			}
+
+			best_mlen = minMatch;
+			{
+				U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
+				for (i = (mlen != 1); i < last_i; i++) {
+					const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+					const U32 repIndex = (U32)(curr + cur - repCur);
+					const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
+					const BYTE *const repMatch = repBase + repIndex;
+					if ((repCur > 0 && repCur <= (S32)(curr + cur)) &&
+					    (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+					    && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
+						/* repcode detected */
+						const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
+						mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
+
+						if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
+							best_mlen = mlen;
+							best_off = i;
+							last_pos = cur + 1;
+							goto _storeSequence;
+						}
+
+						best_off = i - (opt[cur].mlen != 1);
+						if (mlen > best_mlen)
+							best_mlen = mlen;
+
+						do {
+							if (opt[cur].mlen == 1) {
+								litlen = opt[cur].litlen;
+								if (cur > litlen) {
+									price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
+															best_off, mlen - MINMATCH, ultra);
+								} else
+									price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+							} else {
+								litlen = 0;
+								price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+							}
+
+							if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
+								SET_PRICE(cur + mlen, mlen, i, litlen, price);
+							mlen--;
+						} while (mlen >= minMatch);
+					}
+				}
+			}
+
+			match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
+
+			if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
+				best_mlen = matches[match_num - 1].len;
+				best_off = matches[match_num - 1].off;
+				last_pos = cur + 1;
+				goto _storeSequence;
+			}
+
+			/* set prices using matches at position = cur */
+			for (u = 0; u < match_num; u++) {
+				mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
+				best_mlen = matches[u].len;
+
+				while (mlen <= best_mlen) {
+					if (opt[cur].mlen == 1) {
+						litlen = opt[cur].litlen;
+						if (cur > litlen)
+							price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
+													matches[u].off - 1, mlen - MINMATCH, ultra);
+						else
+							price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
+					} else {
+						litlen = 0;
+						price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
+					}
+
+					if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
+						SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
+
+					mlen++;
+				}
+			}
+		} /* for (cur = 1; cur <= last_pos; cur++) */
+
+		best_mlen = opt[last_pos].mlen;
+		best_off = opt[last_pos].off;
+		cur = last_pos - best_mlen;
+
+	/* store sequence */
+_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
+		opt[0].mlen = 1;
+
+		while (1) {
+			mlen = opt[cur].mlen;
+			offset = opt[cur].off;
+			opt[cur].mlen = best_mlen;
+			opt[cur].off = best_off;
+			best_mlen = mlen;
+			best_off = offset;
+			if (mlen > cur)
+				break;
+			cur -= mlen;
+		}
+
+		for (u = 0; u <= last_pos;) {
+			u += opt[u].mlen;
+		}
+
+		for (cur = 0; cur < last_pos;) {
+			mlen = opt[cur].mlen;
+			if (mlen == 1) {
+				ip++;
+				cur++;
+				continue;
+			}
+			offset = opt[cur].off;
+			cur += mlen;
+			litLength = (U32)(ip - anchor);
+
+			if (offset > ZSTD_REP_MOVE_OPT) {
+				rep[2] = rep[1];
+				rep[1] = rep[0];
+				rep[0] = offset - ZSTD_REP_MOVE_OPT;
+				offset--;
+			} else {
+				if (offset != 0) {
+					best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
+					if (offset != 1)
+						rep[2] = rep[1];
+					rep[1] = rep[0];
+					rep[0] = best_off;
+				}
+
+				if (litLength == 0)
+					offset--;
+			}
+
+			ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
+			ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
+			anchor = ip = ip + mlen;
+		}
+	} /* for (cur=0; cur < last_pos; ) */
+
+	/* Save reps for next block */
+	{
+		int i;
+		for (i = 0; i < ZSTD_REP_NUM; i++)
+			ctx->repToConfirm[i] = rep[i];
+	}
+
+	/* Last Literals */
+	{
+		size_t lastLLSize = iend - anchor;
+		memcpy(seqStorePtr->lit, anchor, lastLLSize);
+		seqStorePtr->lit += lastLLSize;
+	}
+}
+
+#endif /* ZSTD_OPT_H_91842398743 */

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74e4e1a..a9d07a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -269,7 +269,7 @@ int watermark_scale_factor;
  * free memory, to make space for new workloads. Anyone can allocate
  * down to the min watermarks controlled by min_free_kbytes above.
  */
-int extra_free_kbytes;
+int extra_free_kbytes = 0;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -6871,9 +6871,10 @@ static void __setup_per_zone_wmarks(void)
 			    mult_frac(zone->managed_pages,
 				      watermark_scale_factor, 10000));
 
-		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + low + min;
-		zone->watermark[WMARK_HIGH] =
-					min_wmark_pages(zone) + low + min * 2;
+		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) +
+					low + min;
+		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+					low + min * 2;
 
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}

diff --git a/mm/slub.c b/mm/slub.c
index 3efb4f2..b5c9fde 100644
--- a/mm/slub.c
+++ b/mm/slub.c

@@ -4681,6 +4681,22 @@ enum slab_stat_type {
 #define SO_OBJECTS	(1 << SL_OBJECTS)
 #define SO_TOTAL	(1 << SL_TOTAL)
 
+#ifdef CONFIG_MEMCG
+static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
+
+static int __init setup_slub_memcg_sysfs(char *str)
+{
+	int v;
+
+	if (get_option(&str, &v) > 0)
+		memcg_sysfs_enabled = v;
+
+	return 1;
+}
+
+__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
+#endif
+
 static ssize_t show_slab_objects(struct kmem_cache *s,
 			    char *buf, unsigned long flags)
 {
@@ -5586,8 +5602,14 @@ static int sysfs_slab_add(struct kmem_cache *s)
 {
 	int err;
 	const char *name;
+	struct kset *kset = cache_kset(s);
 	int unmergeable = slab_unmergeable(s);
 
+	if (!kset) {
+		kobject_init(&s->kobj, &slab_ktype);
+		return 0;
+	}
+
 	if (unmergeable) {
 		/*
 		 * Slabcache can never be merged so we can use the name proper.
@@ -5604,7 +5626,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
 		name = create_unique_id(s);
 	}
 
-	s->kobj.kset = cache_kset(s);
+	s->kobj.kset = kset;
 	err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
 	if (err)
 		goto out;
@@ -5614,7 +5636,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
 		goto out_del_kobj;
 
 #ifdef CONFIG_MEMCG
-	if (is_root_cache(s)) {
+	if (is_root_cache(s) && memcg_sysfs_enabled) {
 		s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
 		if (!s->memcg_kset) {
 			err = -ENOMEM;

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index cf15851..e331312 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c

@@ -188,6 +188,7 @@ static int zs_size_classes;
  * (see: fix_fullness_group())
  */
 static const int fullness_threshold_frac = 4;
+static size_t huge_class_size;
 
 struct size_class {
 	spinlock_t lock;
@@ -1487,6 +1488,25 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
+/**
+ * zs_huge_class_size() - Returns the size (in bytes) of the first huge
+ *                        zsmalloc &size_class.
+ * @pool: zsmalloc pool to use
+ *
+ * The function returns the size of the first huge class - any object of equal
+ * or bigger size will be stored in zspage consisting of a single physical
+ * page.
+ *
+ * Context: Any context.
+ *
+ * Return: the size (in bytes) of the first huge zsmalloc &size_class.
+ */
+size_t zs_huge_class_size(struct zs_pool *pool)
+{
+	return huge_class_size;
+}
+EXPORT_SYMBOL_GPL(zs_huge_class_size);
+
 static unsigned long obj_malloc(struct size_class *class,
 				struct zspage *zspage, unsigned long handle)
 {
@@ -2443,6 +2463,27 @@ struct zs_pool *zs_create_pool(const char *name)
 		objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
 
 		/*
+		 * We iterate from biggest down to smallest classes,
+		 * so huge_class_size holds the size of the first huge
+		 * class. Any object bigger than or equal to that will
+		 * endup in the huge class.
+		 */
+		if (pages_per_zspage != 1 && objs_per_zspage != 1 &&
+				!huge_class_size) {
+			huge_class_size = size;
+			/*
+			 * The object uses ZS_HANDLE_SIZE bytes to store the
+			 * handle. We need to subtract it, because zs_malloc()
+			 * unconditionally adds handle size before it performs
+			 * size class search - so object may be smaller than
+			 * huge class size, yet it still can end up in the huge
+			 * class because it grows by ZS_HANDLE_SIZE extra bytes
+			 * right before class lookup.
+			 */
+			huge_class_size -= (ZS_HANDLE_SIZE - 1);
+		}
+
+		/*
 		 * size_class is used for normal zsmalloc operation such
 		 * as alloc/free for that size. Although it is natural that we
 		 * have one size_class for each size, there is a chance that we

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 457f882..9b2d6112 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c

@@ -666,7 +666,7 @@ int netpoll_setup(struct netpoll *np)
 	int err;
 
 	rtnl_lock();
-	if (np->dev_name) {
+	if (np->dev_name[0]) {
 		struct net *net = current->nsproxy->net_ns;
 		ndev = __dev_get_by_name(net, np->dev_name);
 	}

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 306b8f0..fc7ec7a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c

@@ -2345,7 +2345,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 			x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
 		} else {
 			/* slow path: we dont already have xfrm_state */
-			x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
+			x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
 						(xfrm_address_t *)&pkt_dev->cur_daddr,
 						(xfrm_address_t *)&pkt_dev->cur_saddr,
 						AF_INET,

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c7dcf40..68c860d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -2786,8 +2786,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
 	[RTA_EXPIRES]		= { .type = NLA_U32 },
-	[RTA_TABLE]		= { .type = NLA_U32 },
 	[RTA_UID]		= { .type = NLA_U32 },
+	[RTA_TABLE]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 58c045d..6bd87c8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c

@@ -1380,7 +1380,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	}
 
 	if (!x)
-		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family);
+		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
 
 	if (x == NULL)
 		return -ENOENT;
@@ -2415,7 +2415,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 			return err;
 	}
 
-	xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
 				   pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
 				   1, &err);
 	security_xfrm_policy_free(pol_ctx);
@@ -2666,7 +2666,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		return -EINVAL;
 
 	delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-	xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
 			      dir, pol->sadb_x_policy_id, delete, &err);
 	if (xp == NULL)
 		return -ENOENT;

diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
index 3dfa5a0..d677909 100644
--- a/net/netfilter/xt_qtaguid.c
+++ b/net/netfilter/xt_qtaguid.c

@@ -1191,11 +1191,6 @@ static void get_dev_and_dir(const struct sk_buff *skb,
 		       par->hooknum, __func__);
 		BUG();
 	}
-	if (unlikely(!(*el_dev)->name)) {
-		pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
-		       par->hooknum, __func__);
-		BUG();
-	}
 	if (skb->dev && *el_dev != skb->dev) {
 		MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs par->%s=%p %s\n",
 			 par->hooknum, skb->dev, skb->dev->name,

diff --git a/net/socket.c b/net/socket.c
index 6af62e2..d3dfa58 100644
--- a/net/socket.c
+++ b/net/socket.c

@@ -553,7 +553,10 @@ static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (!err && (iattr->ia_valid & ATTR_UID)) {
 		struct socket *sock = SOCKET_I(d_inode(dentry));
 
-		sock->sk->sk_uid = iattr->ia_uid;
+		if (sock->sk)
+			sock->sk->sk_uid = iattr->ia_uid;
+		else
+			err = -ENOENT;
 	}
 
 	return err;
@@ -604,12 +607,16 @@ EXPORT_SYMBOL(sock_alloc);
  *	an inode not a file.
  */
 
-void sock_release(struct socket *sock)
+static void __sock_release(struct socket *sock, struct inode *inode)
 {
 	if (sock->ops) {
 		struct module *owner = sock->ops->owner;
 
+		if (inode)
+			inode_lock(inode);
 		sock->ops->release(sock);
+		if (inode)
+			inode_unlock(inode);
 		sock->ops = NULL;
 		module_put(owner);
 	}
@@ -624,6 +631,11 @@ void sock_release(struct socket *sock)
 	}
 	sock->file = NULL;
 }
+
+void sock_release(struct socket *sock)
+{
+	__sock_release(sock, NULL);
+}
 EXPORT_SYMBOL(sock_release);
 
 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
@@ -1056,7 +1068,7 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int sock_close(struct inode *inode, struct file *filp)
 {
-	sock_release(SOCKET_I(inode));
+	__sock_release(SOCKET_I(inode), inode);
 	return 0;
 }
 

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index bda1a13..779579c 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig

@@ -20,6 +20,14 @@
 
 	  If unsure, say Y.
 
+config XFRM_INTERFACE
+	tristate "Transformation virtual interface"
+	depends on XFRM && IPV6
+	---help---
+	  This provides a virtual interface to route IPsec traffic.
+
+	  If unsure, say N.
+
 config XFRM_SUB_POLICY
 	bool "Transformation sub policy support"
 	depends on XFRM

diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index c0e9619..5f038f33 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile

@@ -9,3 +9,4 @@
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
+obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6e3f025..87b9c0d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c

@@ -234,23 +234,28 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+		secpath_reset(skb);
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 		goto drop;
 	}
 
 	do {
 		if (skb->sp->len == XFRM_MAX_DEPTH) {
+			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 			goto drop;
 		}
 
 		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
 		if (x == NULL) {
+			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
 			xfrm_audit_state_notfound(skb, family, spi, seq);
 			goto drop;
 		}
 
+		skb->mark = xfrm_smark_get(skb->mark, x);
+
 		skb->sp->xvec[skb->sp->len++] = x;
 
 		spin_lock(&x->lock);

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
new file mode 100644
index 0000000..0b6818d
--- /dev/null
+++ b/net/xfrm/xfrm_interface.c

@@ -0,0 +1,974 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	XFRM virtual interface
+ *
+ *	Copyright (C) 2018 secunet Security Networks AG
+ *
+ *	Author:
+ *	Steffen Klassert <steffen.klassert@secunet.com>
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_link.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/etherdevice.h>
+
+static int xfrmi_dev_init(struct net_device *dev);
+static void xfrmi_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
+static unsigned int xfrmi_net_id __read_mostly;
+
+struct xfrmi_net {
+	/* lists for storing interfaces in use */
+	struct xfrm_if __rcu *xfrmi[1];
+};
+
+#define for_each_xfrmi_rcu(start, xi) \
+	for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
+
+static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
+{
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+	struct xfrm_if *xi;
+
+	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+		if (x->if_id == xi->p.if_id &&
+		    (xi->dev->flags & IFF_UP))
+			return xi;
+	}
+
+	return NULL;
+}
+
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+{
+	struct xfrmi_net *xfrmn;
+	int ifindex;
+	struct xfrm_if *xi;
+
+	if (!skb->dev)
+		return NULL;
+
+	xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
+	ifindex = skb->dev->ifindex;
+
+	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+		if (ifindex == xi->dev->ifindex &&
+			(xi->dev->flags & IFF_UP))
+				return xi;
+	}
+
+	return NULL;
+}
+
+static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+	struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
+
+	rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
+	rcu_assign_pointer(*xip, xi);
+}
+
+static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+	struct xfrm_if __rcu **xip;
+	struct xfrm_if *iter;
+
+	for (xip = &xfrmn->xfrmi[0];
+	     (iter = rtnl_dereference(*xip)) != NULL;
+	     xip = &iter->next) {
+		if (xi == iter) {
+			rcu_assign_pointer(*xip, xi->next);
+			break;
+		}
+	}
+}
+
+static void xfrmi_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+}
+
+static int xfrmi_create2(struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+	int err;
+
+	dev->rtnl_link_ops = &xfrmi_link_ops;
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto out;
+
+	strcpy(xi->p.name, dev->name);
+
+	dev_hold(dev);
+	xfrmi_link(xfrmn, xi);
+
+	return 0;
+
+out:
+	return err;
+}
+
+static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
+{
+	struct net_device *dev;
+	struct xfrm_if *xi;
+	char name[IFNAMSIZ];
+	int err;
+
+	if (p->name[0]) {
+		strlcpy(name, p->name, IFNAMSIZ);
+	} else {
+		err = -EINVAL;
+		goto failed;
+	}
+
+	dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
+	if (!dev) {
+		err = -EAGAIN;
+		goto failed;
+	}
+
+	dev_net_set(dev, net);
+
+	xi = netdev_priv(dev);
+	xi->p = *p;
+	xi->net = net;
+	xi->dev = dev;
+	xi->phydev = dev_get_by_index(net, p->link);
+	if (!xi->phydev) {
+		err = -ENODEV;
+		goto failed_free;
+	}
+
+	err = xfrmi_create2(dev);
+	if (err < 0)
+		goto failed_dev_put;
+
+	return xi;
+
+failed_dev_put:
+	dev_put(xi->phydev);
+failed_free:
+	free_netdev(dev);
+failed:
+	return ERR_PTR(err);
+}
+
+static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
+				   int create)
+{
+	struct xfrm_if __rcu **xip;
+	struct xfrm_if *xi;
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+	for (xip = &xfrmn->xfrmi[0];
+	     (xi = rtnl_dereference(*xip)) != NULL;
+	     xip = &xi->next) {
+		if (xi->p.if_id == p->if_id) {
+			if (create)
+				return ERR_PTR(-EEXIST);
+
+			return xi;
+		}
+	}
+	if (!create)
+		return ERR_PTR(-ENODEV);
+	return xfrmi_create(net, p);
+}
+
+static void xfrmi_dev_uninit(struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
+
+	xfrmi_unlink(xfrmn, xi);
+	dev_put(xi->phydev);
+	dev_put(dev);
+}
+
+static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
+{
+	skb->tstamp.tv64 = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->skb_iif = 0;
+	skb->ignore_df = 0;
+	skb_dst_drop(skb);
+	nf_reset(skb);
+	nf_reset_trace(skb);
+
+	if (!xnet)
+		return;
+
+	ipvs_reset(skb);
+	secpath_reset(skb);
+	skb_orphan(skb);
+	skb->mark = 0;
+}
+
+static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
+{
+	struct pcpu_sw_netstats *tstats;
+	struct xfrm_mode *inner_mode;
+	struct net_device *dev;
+	struct xfrm_state *x;
+	struct xfrm_if *xi;
+	bool xnet;
+
+	if (err && !skb->sp)
+		return 0;
+
+	x = xfrm_input_state(skb);
+
+	xi = xfrmi_lookup(xs_net(x), x);
+	if (!xi)
+		return 1;
+
+	dev = xi->dev;
+	skb->dev = dev;
+
+	if (err) {
+		dev->stats.rx_errors++;
+		dev->stats.rx_dropped++;
+
+		return 0;
+	}
+
+	xnet = !net_eq(xi->net, dev_net(skb->dev));
+
+	if (xnet) {
+		inner_mode = x->inner_mode;
+
+		if (x->sel.family == AF_UNSPEC) {
+			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+			if (inner_mode == NULL) {
+				XFRM_INC_STATS(dev_net(skb->dev),
+					       LINUX_MIB_XFRMINSTATEMODEERROR);
+				return -EINVAL;
+			}
+		}
+
+		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
+				       inner_mode->afinfo->family))
+			return -EPERM;
+	}
+
+	xfrmi_scrub_packet(skb, xnet);
+
+	tstats = this_cpu_ptr(dev->tstats);
+
+	u64_stats_update_begin(&tstats->syncp);
+	tstats->rx_packets++;
+	tstats->rx_bytes += skb->len;
+	u64_stats_update_end(&tstats->syncp);
+
+	return 0;
+}
+
+static int
+xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net_device_stats *stats = &xi->dev->stats;
+	struct dst_entry *dst = skb_dst(skb);
+	unsigned int length = skb->len;
+	struct net_device *tdev;
+	struct xfrm_state *x;
+	int err = -1;
+	int mtu;
+
+	if (!dst)
+		goto tx_err_link_failure;
+
+	fl->flowi_xfrm.if_id = xi->p.if_id;
+
+	dst_hold(dst);
+	dst = xfrm_lookup(xi->net, dst, fl, NULL, 0);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
+		goto tx_err_link_failure;
+	}
+
+	x = dst->xfrm;
+	if (!x)
+		goto tx_err_link_failure;
+
+	if (x->if_id != xi->p.if_id)
+		goto tx_err_link_failure;
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		net_warn_ratelimited("%s: Local routing loop detected!\n",
+				     xi->p.name);
+		goto tx_err_dst_release;
+	}
+
+	mtu = dst_mtu(dst);
+	if (!skb->ignore_df && skb->len > mtu) {
+		if (dst && dst->ops->update_pmtu)
+			dst->ops->update_pmtu(dst, NULL, skb, mtu);
+
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		} else {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		}
+
+		dst_release(dst);
+		return -EMSGSIZE;
+	}
+
+	xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
+	skb_dst_set(skb, dst);
+	skb->dev = tdev;
+
+	err = dst_output(xi->net, skb->sk, skb);
+	if (net_xmit_eval(err) == 0) {
+		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += length;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		stats->tx_errors++;
+		stats->tx_aborted_errors++;
+	}
+
+	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
+tx_err_dst_release:
+	dst_release(dst);
+	return err;
+}
+
+static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net_device_stats *stats = &xi->dev->stats;
+	struct flowi fl;
+	int ret;
+
+	memset(&fl, 0, sizeof(fl));
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IPV6):
+		xfrm_decode_session(skb, &fl, AF_INET6);
+		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+		break;
+	case htons(ETH_P_IP):
+		xfrm_decode_session(skb, &fl, AF_INET);
+		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+		break;
+	default:
+		goto tx_err;
+	}
+
+	fl.flowi_oif = xi->phydev->ifindex;
+
+	ret = xfrmi_xmit2(skb, dev, &fl);
+	if (ret < 0)
+		goto tx_err;
+
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int xfrmi4_err(struct sk_buff *skb, u32 info)
+{
+	const struct iphdr *iph = (const struct iphdr *)skb->data;
+	struct net *net = dev_net(skb->dev);
+	int protocol = iph->protocol;
+	struct ip_comp_hdr *ipch;
+	struct ip_esp_hdr *esph;
+	struct ip_auth_hdr *ah ;
+	struct xfrm_state *x;
+	struct xfrm_if *xi;
+	__be32 spi;
+
+	switch (protocol) {
+	case IPPROTO_ESP:
+		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+		spi = esph->spi;
+		break;
+	case IPPROTO_AH:
+		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+		spi = ah->spi;
+		break;
+	case IPPROTO_COMP:
+		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+		spi = htonl(ntohs(ipch->cpi));
+		break;
+	default:
+		return 0;
+	}
+
+	switch (icmp_hdr(skb)->type) {
+	case ICMP_DEST_UNREACH:
+		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+			return 0;
+	case ICMP_REDIRECT:
+		break;
+	default:
+		return 0;
+	}
+
+	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+			      spi, protocol, AF_INET);
+	if (!x)
+		return 0;
+
+	xi = xfrmi_lookup(net, x);
+	if (!xi) {
+		xfrm_state_put(x);
+		return -1;
+	}
+
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+		ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+	else
+		ipv4_redirect(skb, net, 0, 0, protocol, 0);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		    u8 type, u8 code, int offset, __be32 info)
+{
+	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+	struct net *net = dev_net(skb->dev);
+	int protocol = iph->nexthdr;
+	struct ip_comp_hdr *ipch;
+	struct ip_esp_hdr *esph;
+	struct ip_auth_hdr *ah;
+	struct xfrm_state *x;
+	struct xfrm_if *xi;
+	__be32 spi;
+
+	switch (protocol) {
+	case IPPROTO_ESP:
+		esph = (struct ip_esp_hdr *)(skb->data + offset);
+		spi = esph->spi;
+		break;
+	case IPPROTO_AH:
+		ah = (struct ip_auth_hdr *)(skb->data + offset);
+		spi = ah->spi;
+		break;
+	case IPPROTO_COMP:
+		ipch = (struct ip_comp_hdr *)(skb->data + offset);
+		spi = htonl(ntohs(ipch->cpi));
+		break;
+	default:
+		return 0;
+	}
+
+	if (type != ICMPV6_PKT_TOOBIG &&
+	    type != NDISC_REDIRECT)
+		return 0;
+
+	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+			      spi, protocol, AF_INET6);
+	if (!x)
+		return 0;
+
+	xi = xfrmi_lookup(net, x);
+	if (!xi) {
+		xfrm_state_put(x);
+		return -1;
+	}
+
+	if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
+	else
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
+{
+	if (xi->p.link != p->link)
+		return -EINVAL;
+
+	xi->p.if_id = p->if_id;
+
+	return 0;
+}
+
+static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
+{
+	struct net *net = dev_net(xi->dev);
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+	int err;
+
+	xfrmi_unlink(xfrmn, xi);
+	synchronize_net();
+	err = xfrmi_change(xi, p);
+	xfrmi_link(xfrmn, xi);
+	netdev_state_change(xi->dev);
+	return err;
+}
+
+static struct rtnl_link_stats64 *xfrmi_get_stats64(struct net_device *dev,
+			       struct rtnl_link_stats64 *s)
+{
+	int cpu;
+
+	if (!dev->tstats)
+		return s;
+
+	for_each_possible_cpu(cpu) {
+		struct pcpu_sw_netstats *stats;
+		struct pcpu_sw_netstats tmp;
+		int start;
+
+		stats = per_cpu_ptr(dev->tstats, cpu);
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			tmp.rx_packets = stats->rx_packets;
+			tmp.rx_bytes   = stats->rx_bytes;
+			tmp.tx_packets = stats->tx_packets;
+			tmp.tx_bytes   = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		s->rx_packets += tmp.rx_packets;
+		s->rx_bytes   += tmp.rx_bytes;
+		s->tx_packets += tmp.tx_packets;
+		s->tx_bytes   += tmp.tx_bytes;
+	}
+
+	s->rx_dropped = dev->stats.rx_dropped;
+	s->tx_dropped = dev->stats.tx_dropped;
+
+	return s;
+}
+
+static int xfrmi_get_iflink(const struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+
+	return xi->phydev->ifindex;
+}
+
+
+static const struct net_device_ops xfrmi_netdev_ops = {
+	.ndo_init	= xfrmi_dev_init,
+	.ndo_uninit	= xfrmi_dev_uninit,
+	.ndo_start_xmit = xfrmi_xmit,
+	.ndo_get_stats64 = xfrmi_get_stats64,
+	.ndo_get_iflink = xfrmi_get_iflink,
+};
+
+static void xfrmi_dev_setup(struct net_device *dev)
+{
+	dev->netdev_ops 	= &xfrmi_netdev_ops;
+	dev->type		= ARPHRD_NONE;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->min_header_len	= ETH_HLEN;
+	dev->mtu		= ETH_DATA_LEN;
+	dev->addr_len		= ETH_ALEN;
+	dev->flags 		= IFF_NOARP;
+	dev->destructor	= xfrmi_dev_free;
+	netif_keep_dst(dev);
+}
+
+static int xfrmi_dev_init(struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net_device *phydev = xi->phydev;
+	int err;
+
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	err = gro_cells_init(&xi->gro_cells, dev);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
+	dev->features |= NETIF_F_LLTX;
+
+	dev->needed_headroom = phydev->needed_headroom;
+	dev->needed_tailroom = phydev->needed_tailroom;
+
+	if (is_zero_ether_addr(dev->dev_addr))
+		eth_hw_addr_inherit(dev, phydev);
+	if (is_zero_ether_addr(dev->broadcast))
+		memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
+
+	return 0;
+}
+
+static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	return 0;
+}
+
+static void xfrmi_netlink_parms(struct nlattr *data[],
+			       struct xfrm_if_parms *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	if (!data)
+		return;
+
+	if (data[IFLA_XFRM_LINK])
+		parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
+
+	if (data[IFLA_XFRM_IF_ID])
+		parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
+}
+
+static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net *net = dev_net(dev);
+	struct xfrm_if_parms *p;
+	struct xfrm_if *xi;
+
+	xi = netdev_priv(dev);
+	p = &xi->p;
+
+	xfrmi_netlink_parms(data, p);
+
+	if (!tb[IFLA_IFNAME])
+		return -EINVAL;
+
+	nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
+
+	xi = xfrmi_locate(net, p, 1);
+	return PTR_ERR_OR_ZERO(xi);
+}
+
+static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
+{
+	unregister_netdevice_queue(dev, head);
+}
+
+static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
+			   struct nlattr *data[])
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+
+	xfrmi_netlink_parms(data, &xi->p);
+
+	xi = xfrmi_locate(net, &xi->p, 0);
+
+	if (IS_ERR_OR_NULL(xi)) {
+		xi = netdev_priv(dev);
+	} else {
+		if (xi->dev != dev)
+			return -EEXIST;
+	}
+
+	return xfrmi_update(xi, &xi->p);
+}
+
+static size_t xfrmi_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_XFRM_LINK */
+		nla_total_size(4) +
+		/* IFLA_XFRM_IF_ID */
+		nla_total_size(4) +
+		0;
+}
+
+static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct xfrm_if_parms *parm = &xi->p;
+
+	if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
+	    nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+struct net *xfrmi_get_link_net(const struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+
+	return dev_net(xi->phydev);
+}
+
+static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
+	[IFLA_XFRM_LINK]	= { .type = NLA_U32 },
+	[IFLA_XFRM_IF_ID]	= { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
+	.kind		= "xfrm",
+	.maxtype	= IFLA_XFRM_MAX,
+	.policy		= xfrmi_policy,
+	.priv_size	= sizeof(struct xfrm_if),
+	.setup		= xfrmi_dev_setup,
+	.validate	= xfrmi_validate,
+	.newlink	= xfrmi_newlink,
+	.dellink	= xfrmi_dellink,
+	.changelink	= xfrmi_changelink,
+	.get_size	= xfrmi_get_size,
+	.fill_info	= xfrmi_fill_info,
+	.get_link_net	= xfrmi_get_link_net,
+};
+
+static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
+{
+	struct xfrm_if *xi;
+	LIST_HEAD(list);
+
+	xi = rtnl_dereference(xfrmn->xfrmi[0]);
+	if (!xi)
+		return;
+
+	unregister_netdevice_queue(xi->dev, &list);
+	unregister_netdevice_many(&list);
+}
+
+static int __net_init xfrmi_init_net(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit xfrmi_exit_net(struct net *net)
+{
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+	rtnl_lock();
+	xfrmi_destroy_interfaces(xfrmn);
+	rtnl_unlock();
+}
+
+static struct pernet_operations xfrmi_net_ops = {
+	.init = xfrmi_init_net,
+	.exit = xfrmi_exit_net,
+	.id   = &xfrmi_net_id,
+	.size = sizeof(struct xfrmi_net),
+};
+
+static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi6_err,
+	.priority	=	10,
+};
+
+static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi6_err,
+	.priority	=	10,
+};
+
+static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi6_err,
+	.priority	=	10,
+};
+
+static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
+	.handler	=	xfrm4_rcv,
+	.input_handler	=	xfrm_input,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi4_err,
+	.priority	=	10,
+};
+
+static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
+	.handler	=	xfrm4_rcv,
+	.input_handler	=	xfrm_input,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi4_err,
+	.priority	=	10,
+};
+
+static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
+	.handler	=	xfrm4_rcv,
+	.input_handler	=	xfrm_input,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi4_err,
+	.priority	=	10,
+};
+
+static int __init xfrmi4_init(void)
+{
+	int err;
+
+	err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
+	if (err < 0)
+		goto xfrm_proto_esp_failed;
+	err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
+	if (err < 0)
+		goto xfrm_proto_ah_failed;
+	err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+	if (err < 0)
+		goto xfrm_proto_comp_failed;
+
+	return 0;
+
+xfrm_proto_comp_failed:
+	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+	return err;
+}
+
+static void xfrmi4_fini(void)
+{
+	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+}
+
+static int __init xfrmi6_init(void)
+{
+	int err;
+
+	err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
+	if (err < 0)
+		goto xfrm_proto_esp_failed;
+	err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
+	if (err < 0)
+		goto xfrm_proto_ah_failed;
+	err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+	if (err < 0)
+		goto xfrm_proto_comp_failed;
+
+	return 0;
+
+xfrm_proto_comp_failed:
+	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+	return err;
+}
+
+static void xfrmi6_fini(void)
+{
+	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+}
+
+static const struct xfrm_if_cb xfrm_if_cb = {
+	.decode_session =	xfrmi_decode_session,
+};
+
+static int __init xfrmi_init(void)
+{
+	const char *msg;
+	int err;
+
+	pr_info("IPsec XFRM device driver\n");
+
+	msg = "tunnel device";
+	err = register_pernet_device(&xfrmi_net_ops);
+	if (err < 0)
+		goto pernet_dev_failed;
+
+	msg = "xfrm4 protocols";
+	err = xfrmi4_init();
+	if (err < 0)
+		goto xfrmi4_failed;
+
+	msg = "xfrm6 protocols";
+	err = xfrmi6_init();
+	if (err < 0)
+		goto xfrmi6_failed;
+
+
+	msg = "netlink interface";
+	err = rtnl_link_register(&xfrmi_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+	xfrm_if_register_cb(&xfrm_if_cb);
+
+	return err;
+
+rtnl_link_failed:
+	xfrmi6_fini();
+xfrmi6_failed:
+	xfrmi4_fini();
+xfrmi4_failed:
+	unregister_pernet_device(&xfrmi_net_ops);
+pernet_dev_failed:
+	pr_err("xfrmi init: failed to register %s\n", msg);
+	return err;
+}
+
+static void __exit xfrmi_fini(void)
+{
+	xfrm_if_unregister_cb();
+	rtnl_link_unregister(&xfrmi_link_ops);
+	xfrmi4_fini();
+	xfrmi6_fini();
+	unregister_pernet_device(&xfrmi_net_ops);
+}
+
+module_init(xfrmi_init);
+module_exit(xfrmi_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("xfrm");
+MODULE_ALIAS_NETDEV("xfrm0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("XFRM virtual interface");

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index d864a6d..bbe611a 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c

@@ -66,8 +66,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			goto error_nolock;
 		}
 
-		if (x->props.output_mark)
-			skb->mark = x->props.output_mark;
+		skb->mark = xfrm_smark_get(skb->mark, x);
 
 		err = x->outer_mode->output(x, skb);
 		if (err) {

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0bad618..79866a9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c

@@ -44,6 +44,9 @@ struct xfrm_flo {
 	u8 flags;
 };
 
+static DEFINE_SPINLOCK(xfrm_if_cb_lock);
+static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
+
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
 						__read_mostly;
@@ -116,7 +119,13 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
 	return afinfo;
 }
 
-static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+/* Called with rcu_read_lock(). */
+static const struct xfrm_if_cb *xfrm_if_get_cb(void)
+{
+	return rcu_dereference(xfrm_if_cb);
+}
+
+static void xfrm_policy_put_afinfo(const struct xfrm_policy_afinfo *afinfo)
 {
 	rcu_read_unlock();
 }
@@ -781,6 +790,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	newpos = NULL;
 	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == policy->type &&
+		    pol->if_id == policy->if_id &&
 		    !selector_cmp(&pol->selector, &policy->selector) &&
 		    xfrm_policy_mark_match(policy, pol) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
@@ -833,8 +843,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
-					  int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
+					  u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err)
 {
@@ -847,6 +858,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 	ret = NULL;
 	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == type &&
+		    pol->if_id == if_id &&
 		    (mark & pol->mark.m) == pol->mark.v &&
 		    !selector_cmp(sel, &pol->selector) &&
 		    xfrm_sec_ctx_match(ctx, pol->security)) {
@@ -872,8 +884,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
-				     int dir, u32 id, int delete, int *err)
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
+				     u8 type, int dir, u32 id, int delete,
+				     int *err)
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
@@ -888,6 +901,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	ret = NULL;
 	hlist_for_each_entry(pol, chain, byidx) {
 		if (pol->type == type && pol->index == id &&
+		    pol->if_id == if_id &&
 		    (mark & pol->mark.m) == pol->mark.v) {
 			xfrm_pol_hold(pol);
 			if (delete) {
@@ -1098,6 +1112,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 	bool match;
 
 	if (pol->family != family ||
+	    pol->if_id != fl->flowi_xfrm.if_id ||
 	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
 	    pol->type != type)
 		return ret;
@@ -1267,7 +1282,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 
 		match = xfrm_selector_match(&pol->selector, fl, family);
 		if (match) {
-			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
+			if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+			    pol->if_id != fl->flowi_xfrm.if_id) {
 				pol = NULL;
 				goto out;
 			}
@@ -1395,6 +1411,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
 		newp->lft = old->lft;
 		newp->curlft = old->curlft;
 		newp->mark = old->mark;
+		newp->if_id = old->if_id;
 		newp->action = old->action;
 		newp->flags = old->flags;
 		newp->xfrm_nr = old->xfrm_nr;
@@ -1750,10 +1767,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		dst_copy_metrics(dst1, dst);
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+			__u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+
 			family = xfrm[i]->props.family;
 			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
-					      &saddr, &daddr, family,
-					      xfrm[i]->props.output_mark);
+					      &saddr, &daddr, family, mark);
 			err = PTR_ERR(dst);
 			if (IS_ERR(dst))
 				goto put_states;
@@ -2123,6 +2141,10 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 						  xflo->dst_orig);
 	if (IS_ERR(new_xdst)) {
 		err = PTR_ERR(new_xdst);
+		if (err == -EREMOTE) {
+			xfrm_pols_put(pols, num_pols);
+			return NULL;
+		}
 		if (err != -EAGAIN)
 			goto error;
 		if (oldflo == NULL)
@@ -2235,6 +2257,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			if (IS_ERR(xdst)) {
 				xfrm_pols_put(pols, num_pols);
 				err = PTR_ERR(xdst);
+				if (err == -EREMOTE)
+					goto nopol;
+
 				goto dropdst;
 			} else if (xdst == NULL) {
 				num_xfrms = 0;
@@ -2433,13 +2458,21 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
 			  unsigned int family, int reverse)
 {
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	const struct xfrm_if_cb *ifcb = xfrm_if_get_cb();
+	struct xfrm_if *xi;
 	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl, reverse);
+	if (ifcb) {
+		xi = ifcb->decode_session(skb);
+		if (xi)
+			fl->flowi_xfrm.if_id = xi->p.if_id;
+	}
+
 	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
@@ -2904,6 +2937,20 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
 static struct notifier_block xfrm_dev_notifier = {
 	.notifier_call	= xfrm_dev_event,
 };
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
+{
+	spin_lock(&xfrm_if_cb_lock);
+	rcu_assign_pointer(xfrm_if_cb, ifcb);
+	spin_unlock(&xfrm_if_cb_lock);
+}
+EXPORT_SYMBOL(xfrm_if_register_cb);
+
+void xfrm_if_unregister_cb(void)
+{
+	RCU_INIT_POINTER(xfrm_if_cb, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(xfrm_if_unregister_cb);
 
 #ifdef CONFIG_XFRM_STATISTICS
 static int __net_init xfrm_statistics_init(struct net *net)
@@ -3083,6 +3130,9 @@ void __init xfrm_init(void)
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
+
+	RCU_INIT_POINTER(xfrm_if_cb, NULL);
+	synchronize_rcu();
 }
 
 #ifdef CONFIG_AUDITSYSCALL

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5306e97..a4a0bd7 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c

@@ -794,6 +794,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
+	u32 if_id = fl->flowi_xfrm.if_id;
 	unsigned short encap_family = tmpl->encap_family;
 	unsigned int sequence;
 	struct km_event c;
@@ -808,6 +809,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
@@ -824,6 +826,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
@@ -863,6 +866,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		 * to current session. */
 		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
+		x->if_id = if_id;
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
 		if (error) {
@@ -920,7 +924,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 }
 
 struct xfrm_state *
-xfrm_stateonly_find(struct net *net, u32 mark,
+xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 		    xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    unsigned short family, u8 mode, u8 proto, u32 reqid)
 {
@@ -933,6 +937,7 @@ xfrm_stateonly_find(struct net *net, u32 mark,
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    mode == x->props.mode &&
@@ -1013,11 +1018,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	struct xfrm_state *x;
 	unsigned int h;
 	u32 mark = xnew->mark.v & xnew->mark.m;
+	u32 if_id = xnew->if_id;
 
 	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
 	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
+		    x->if_id		== if_id &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
 		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1040,7 +1047,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
 static struct xfrm_state *__find_acq_core(struct net *net,
 					  const struct xfrm_mark *m,
 					  unsigned short family, u8 mode,
-					  u32 reqid, u8 proto,
+					  u32 reqid, u32 if_id, u8 proto,
 					  const xfrm_address_t *daddr,
 					  const xfrm_address_t *saddr,
 					  int create)
@@ -1095,6 +1102,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 		x->props.family = family;
 		x->props.mode = mode;
 		x->props.reqid = reqid;
+		x->if_id = if_id;
 		x->mark.v = m->v;
 		x->mark.m = m->m;
 		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
@@ -1149,7 +1157,7 @@ int xfrm_state_add(struct xfrm_state *x)
 
 	if (use_spi && !x1)
 		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
-				     x->props.reqid, x->id.proto,
+				     x->props.reqid, x->if_id, x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
 	__xfrm_state_bump_genids(x);
@@ -1241,6 +1249,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
 	x->props.flags = orig->props.flags;
 	x->props.extra_flags = orig->props.extra_flags;
 
+	x->if_id = orig->if_id;
 	x->tfcpad = orig->tfcpad;
 	x->replay_maxdiff = orig->replay_maxdiff;
 	x->replay_maxage = orig->replay_maxage;
@@ -1395,6 +1404,19 @@ int xfrm_state_update(struct xfrm_state *x)
 		if (x1->curlft.use_time)
 			xfrm_state_check_expire(x1);
 
+		if (x->props.smark.m || x->props.smark.v || x->if_id) {
+			spin_lock_bh(&net->xfrm.xfrm_state_lock);
+
+			if (x->props.smark.m || x->props.smark.v)
+				x1->props.smark = x->props.smark;
+
+			if (x->if_id)
+				x1->if_id = x->if_id;
+
+			__xfrm_state_bump_genids(x1);
+			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+		}
+
 		err = 0;
 		x->km.state = XFRM_STATE_DEAD;
 		__xfrm_state_put(x);
@@ -1458,13 +1480,13 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
-	      u8 proto, const xfrm_address_t *daddr,
+	      u32 if_id, u8 proto, const xfrm_address_t *daddr,
 	      const xfrm_address_t *saddr, int create, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
+	x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 	return x;

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2c92586..0d7dd77 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -533,6 +533,19 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
 		x->replay_maxdiff = nla_get_u32(rt);
 }
 
+static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
+{
+	if (attrs[XFRMA_SET_MARK]) {
+		m->v = nla_get_u32(attrs[XFRMA_SET_MARK]);
+		if (attrs[XFRMA_SET_MARK_MASK])
+			m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]);
+		else
+			m->m = 0xffffffff;
+	} else {
+		m->v = m->m = 0;
+	}
+}
+
 static struct xfrm_state *xfrm_state_construct(struct net *net,
 					       struct xfrm_usersa_info *p,
 					       struct nlattr **attrs,
@@ -585,8 +598,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	if (attrs[XFRMA_OUTPUT_MARK])
-		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+	xfrm_smark_init(attrs, &x->props.smark);
+
+	if (attrs[XFRMA_IF_ID])
+		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
 
 	err = __xfrm_init_state(x, false);
 	if (err)
@@ -801,6 +816,18 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
 	return 0;
 }
 
+static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
+{
+	int ret = 0;
+
+	if (m->v | m->m) {
+		ret = nla_put_u32(skb, XFRMA_SET_MARK, m->v);
+		if (!ret)
+			ret = nla_put_u32(skb, XFRMA_SET_MARK_MASK, m->m);
+	}
+	return ret;
+}
+
 /* Don't change this without updating xfrm_sa_len! */
 static int copy_to_user_state_extra(struct xfrm_state *x,
 				    struct xfrm_usersa_info *p,
@@ -864,6 +891,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 	ret = xfrm_mark_put(skb, &x->mark);
 	if (ret)
 		goto out;
+
+	ret = xfrm_smark_put(skb, &x->props.smark);
+	if (ret)
+		goto out;
+
 	if (x->replay_esn)
 		ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
 			      xfrm_replay_state_esn_len(x->replay_esn),
@@ -873,13 +905,15 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 			      &x->replay);
 	if (ret)
 		goto out;
-	if (x->security)
-		ret = copy_sec_ctx(x->security, skb);
-	if (x->props.output_mark) {
-		ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+
+	if (x->if_id) {
+		ret = nla_put_u32(skb, XFRMA_IF_ID, x->if_id);
 		if (ret)
 			goto out;
 	}
+
+	if (x->security)
+		ret = copy_sec_ctx(x->security, skb);
 out:
 	return ret;
 }
@@ -1226,6 +1260,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err;
 	u32 mark;
 	struct xfrm_mark m;
+	u32 if_id = 0;
 
 	p = nlmsg_data(nlh);
 	err = verify_spi_info(p->info.id.proto, p->min, p->max);
@@ -1238,6 +1273,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	x = NULL;
 
 	mark = xfrm_mark_get(attrs, &m);
+
+	if (attrs[XFRMA_IF_ID])
+		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
 		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
@@ -1248,7 +1287,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (!x)
 		x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
-				  p->info.id.proto, daddr,
+				  if_id, p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
 	err = -ENOENT;
@@ -1545,6 +1584,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 
 	xfrm_mark_get(attrs, &xp->mark);
 
+	if (attrs[XFRMA_IF_ID])
+		xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	return xp;
  error:
 	*errp = err;
@@ -1692,6 +1734,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -1743,10 +1787,6 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
 	struct sk_buff *skb;
 	int err;
 
-	err = verify_policy_dir(dir);
-	if (err)
-		return ERR_PTR(err);
-
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
@@ -1777,6 +1817,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int delete;
 	struct xfrm_mark m;
 	u32 mark = xfrm_mark_get(attrs, &m);
+	u32 if_id = 0;
 
 	p = nlmsg_data(nlh);
 	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
@@ -1789,8 +1830,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
+	if (attrs[XFRMA_IF_ID])
+		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	if (p->index)
-		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err);
+		xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1807,7 +1851,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel,
+		xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
 					   ctx, delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -1931,6 +1975,10 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		goto out_cancel;
 
+	err = xfrm_if_id_put(skb, x->if_id);
+	if (err)
+		goto out_cancel;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -2072,6 +2120,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err = -ENOENT;
 	struct xfrm_mark m;
 	u32 mark = xfrm_mark_get(attrs, &m);
+	u32 if_id = 0;
 
 	err = copy_from_user_policy_type(&type, attrs);
 	if (err)
@@ -2081,8 +2130,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
+	if (attrs[XFRMA_IF_ID])
+		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	if (p->index)
-		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
+		xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -2099,7 +2151,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir,
+		xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
 					   &p->sel, ctx, 0, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -2268,10 +2320,6 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int n = 0;
 	struct net *net = sock_net(skb->sk);
 
-	err = verify_policy_dir(pi->dir);
-	if (err)
-		return err;
-
 	if (attrs[XFRMA_MIGRATE] == NULL)
 		return -EINVAL;
 
@@ -2387,11 +2435,6 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 {
 	struct net *net = &init_net;
 	struct sk_buff *skb;
-	int err;
-
-	err = verify_policy_dir(dir);
-	if (err)
-		return err;
 
 	skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC);
 	if (skb == NULL)
@@ -2467,7 +2510,9 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
-	[XFRMA_OUTPUT_MARK]	= { .len = NLA_U32 },
+	[XFRMA_SET_MARK]	= { .type = NLA_U32 },
+	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
+	[XFRMA_IF_ID]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2596,6 +2641,10 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		return err;
 
+	err = xfrm_if_id_put(skb, x->if_id);
+	if (err)
+		return err;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 }
@@ -2687,8 +2736,12 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(*x->coaddr));
 	if (x->props.extra_flags)
 		l += nla_total_size(sizeof(x->props.extra_flags));
-	if (x->props.output_mark)
-		l += nla_total_size(sizeof(x->props.output_mark));
+	if (x->props.smark.v | x->props.smark.m) {
+		l += nla_total_size(sizeof(x->props.smark.v));
+		l += nla_total_size(sizeof(x->props.smark.m));
+	}
+	if (x->if_id)
+		l += nla_total_size(sizeof(x->if_id));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
@@ -2816,6 +2869,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -2931,6 +2986,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -3010,6 +3067,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err)
 		goto out_free_skb;
 
@@ -3052,11 +3111,6 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
 
 static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
-	int err;
-
-	err = verify_policy_dir(dir);
-	if (err)
-		return err;
 
 	switch (c->event) {
 	case XFRM_MSG_NEWPOLICY:

diff --git a/scripts/clang-android.sh b/scripts/clang-android.sh
new file mode 100755
index 0000000..9186c4f
--- /dev/null
+++ b/scripts/clang-android.sh

@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+$* -dM -E - </dev/null 2>&1 | grep -q __ANDROID__ && echo "y"

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d560fd8..a339bea 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h

@@ -73,6 +73,8 @@ enum bpf_cmd {
 	BPF_PROG_LOAD,
 	BPF_OBJ_PIN,
 	BPF_OBJ_GET,
+	BPF_PROG_ATTACH,
+	BPF_PROG_DETACH,
 };
 
 enum bpf_map_type {
@@ -96,8 +98,23 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_TRACEPOINT,
 	BPF_PROG_TYPE_XDP,
 	BPF_PROG_TYPE_PERF_EVENT,
+	BPF_PROG_TYPE_CGROUP_SKB,
 };
 
+enum bpf_attach_type {
+	BPF_CGROUP_INET_INGRESS,
+	BPF_CGROUP_INET_EGRESS,
+	__MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -107,6 +124,10 @@ enum bpf_prog_type {
 
 #define BPF_F_NO_PREALLOC	(1U << 0)
 
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY		(1U << 3)
+#define BPF_F_WRONLY		(1U << 4)
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -140,6 +161,14 @@ union bpf_attr {
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
 		__aligned_u64	pathname;
 		__u32		bpf_fd;
+		__u32		file_flags;
+	};
+
+	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+		__u32		target_fd;	/* container object to attach to */
+		__u32		attach_bpf_fd;	/* eBPF program to attach */
+		__u32		attach_type;
+		__u32		attach_flags;
 	};
 } __attribute__((aligned(8)));
 
@@ -388,40 +417,41 @@ enum bpf_func_id {
 	BPF_FUNC_current_task_under_cgroup,
 
 	/**
-	 * int bpf_skb_change_tail(skb, len, flags)
-	 *     The helper will resize the skb to the given new size, to be used f.e.
-	 *     with control messages.
-	 *     @skb: pointer to skb
-	 *     @len: new skb length
-	 *     @flags: reserved
-	 *     Return: 0 on success or negative error
+	 * bpf_skb_change_tail(skb, len, flags)
+	 * The helper will resize the skb to the given new size,
+	 * to be used f.e. with control messages.
+	 * @skb: pointer to skb
+	 * @len: new skb length
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
 	 */
 	BPF_FUNC_skb_change_tail,
 
 	/**
-	 * int bpf_skb_pull_data(skb, len)
-	 *     The helper will pull in non-linear data in case the skb is non-linear
-	 *     and not all of len are part of the linear section. Only needed for
-	 *     read/write with direct packet access.
-	 *     @skb: pointer to skb
-	 *     @len: len to make read/writeable
-	 *     Return: 0 on success or negative error
+	 * bpf_skb_pull_data(skb, len)
+	 * The helper will pull in non-linear data in case the
+	 * skb is non-linear and not all of len are part of the
+	 * linear section. Only needed for read/write with direct
+	 * packet access.
+	 * @skb: pointer to skb
+	 * @len: len to make read/writeable
+	 * Return: 0 on success or negative error
 	 */
 	BPF_FUNC_skb_pull_data,
 
 	/**
-	 * s64 bpf_csum_update(skb, csum)
-	 *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
-	 *     @skb: pointer to skb
-	 *     @csum: csum to add
-	 *     Return: csum on success or negative error
+	 * bpf_csum_update(skb, csum)
+	 * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+	 * @skb: pointer to skb
+	 * @csum: csum to add
+	 * Return: csum on success or negative error
 	 */
 	BPF_FUNC_csum_update,
 
 	/**
-	 * void bpf_set_hash_invalid(skb)
-	 *     Invalidate current skb->hash.
-	 *     @skb: pointer to skb
+	 * bpf_set_hash_invalid(skb)
+	 * Invalidate current skb>hash.
+	 * @skb: pointer to skb
 	 */
 	BPF_FUNC_set_hash_invalid,
 
@@ -469,12 +499,11 @@ enum bpf_func_id {
 	BPF_FUNC_probe_read_str,
 
 	/**
-	 * u64 bpf_get_socket_cookie(skb)
-	 * Get the cookie for the socket stored inside sk_buff.
-	 * @skb: pointer to skb
-	 * Return: 8 Bytes non-decreasing number on success or 0 if
-	 * the socket
-	 * field is missing inside sk_buff
+	 * u64 bpf_bpf_get_socket_cookie(skb)
+	 *     Get the cookie for the socket stored inside sk_buff.
+	 *     @skb: pointer to skb
+	 *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+	 *     field is missing inside sk_buff
 	 */
 	BPF_FUNC_get_socket_cookie,
 
@@ -482,7 +511,8 @@ enum bpf_func_id {
 	 * u32 bpf_get_socket_uid(skb)
 	 *     Get the owner uid of the socket stored inside sk_buff.
 	 *     @skb: pointer to skb
-	 *     Return: uid of the socket owner on success or overflowuid if failed.
+	 *     Return: uid of the socket owner on success or 0 if the socket pointer
+	 *     inside sk_buff is NULL
 	 */
 	BPF_FUNC_get_socket_uid,
 

diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 1fff237..3995735 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt

@@ -289,21 +289,6 @@
       might be corrupt due to a gcc bug.  For more details, see:
       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
 
-2. If you're getting any other objtool error in a compiled .c file, it
-   may be because the file uses an asm() statement which has a "call"
-   instruction.  An asm() statement with a call instruction must declare
-   the use of the stack pointer in its output operand.  On x86_64, this
-   means adding the ASM_CALL_CONSTRAINT as an output constraint:
-
-     asm volatile("call func" : ASM_CALL_CONSTRAINT);
-
-   Otherwise the stack frame may not get created before the call.
-
-3. Another possible cause for errors in C code is if the Makefile removes
-   -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
-
-Also see the above section for .S file errors for more information what
-the individual error messages mean.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.

diff --git a/verity_dev_keys.x509 b/verity_dev_keys.x509
new file mode 100644
index 0000000..86399c3
--- /dev/null
+++ b/verity_dev_keys.x509

@@ -0,0 +1,24 @@
+-----BEGIN CERTIFICATE-----
+MIID/TCCAuWgAwIBAgIJAJcPmDkJqolJMA0GCSqGSIb3DQEBBQUAMIGUMQswCQYD
+VQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4g
+VmlldzEQMA4GA1UECgwHQW5kcm9pZDEQMA4GA1UECwwHQW5kcm9pZDEQMA4GA1UE
+AwwHQW5kcm9pZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbTAe
+Fw0xNDExMDYxOTA3NDBaFw00MjAzMjQxOTA3NDBaMIGUMQswCQYDVQQGEwJVUzET
+MBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzEQMA4G
+A1UECgwHQW5kcm9pZDEQMA4GA1UECwwHQW5kcm9pZDEQMA4GA1UEAwwHQW5kcm9p
+ZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbTCCASIwDQYJKoZI
+hvcNAQEBBQADggEPADCCAQoCggEBAOjreE0vTVSRenuzO9vnaWfk0eQzYab0gqpi
+6xAzi6dmD+ugoEKJmbPiuE5Dwf21isZ9uhUUu0dQM46dK4ocKxMRrcnmGxydFn6o
+fs3ODJMXOkv2gKXL/FdbEPdDbxzdu8z3yk+W67udM/fW7WbaQ3DO0knu+izKak/3
+T41c5uoXmQ81UNtAzRGzGchNVXMmWuTGOkg6U+0I2Td7K8yvUMWhAWPPpKLtVH9r
+AL5TzjYNR92izdKcz3AjRsI3CTjtpiVABGeX0TcjRSuZB7K9EK56HV+OFNS6I1NP
+jdD7FIShyGlqqZdUOkAUZYanbpgeT5N7QL6uuqcGpoTOkalu6kkCAwEAAaNQME4w
+HQYDVR0OBBYEFH5DM/m7oArf4O3peeKO0ZIEkrQPMB8GA1UdIwQYMBaAFH5DM/m7
+oArf4O3peeKO0ZIEkrQPMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEB
+AHO3NSvDE5jFvMehGGtS8BnFYdFKRIglDMc4niWSzhzOVYRH4WajxdtBWc5fx0ix
+NF/+hVKVhP6AIOQa+++sk+HIi7RvioPPbhjcsVlZe7cUEGrLSSveGouQyc+j0+m6
+JF84kszIl5GGNMTnx0XRPO+g8t6h5LWfnVydgZfpGRRg+WHewk1U2HlvTjIceb0N
+dcoJ8WKJAFWdcuE7VIm4w+vF/DYX/A2Oyzr2+QRhmYSv1cusgAeC1tvH4ap+J1Lg
+UnOu5Kh/FqPLLSwNVQp4Bu7b9QFfqK8Moj84bj88NqRGZgDyqzuTrFxn6FW7dmyA
+yttuAJAEAymk1mipd9+zp38=
+-----END CERTIFICATE-----
commit	82fb8eb1d4f8b41cc76b59fc567f77b6e9c4ffd8	[log] [tgz]
author	Petri Gynther <pgynther@google.com>	Thu Jan 24 16:29:12 2019 -0800
committer	Petri Gynther <pgynther@google.com>	Thu Jan 24 23:35:45 2019 -0800
tree	a30c69324a01e9cf76542e2122235779c2a2750b
parent	ab281c5bf4753b1048d8a059342f89a0d24e9169 [diff]
parent	ca975794ea765c52ccf59f30197d9677ec036418 [diff]