Upgrade rust/crates/ring to 0.16.19 am: 4d02e64347

Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/ring/+/1520475

MUST ONLY BE SUBMITTED BY AUTOMERGER

Change-Id: Ide38be3068bc2fc4a197253e7f33b841e0b225d5
diff --git a/Cargo.toml b/Cargo.toml
index 1d825f9..fef8f20 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "ring"
-version = "0.16.18"
+version = "0.16.19"
 authors = ["Brian Smith <brian@briansmith.org>"]
 build = "build.rs"
 links = "ring-asm"
@@ -74,7 +74,7 @@
 features = ["std"]
 optional = true
 default-features = false
-[target."cfg(any(target_os = \"freebsd\", target_os = \"illumos\", target_os = \"netbsd\", target_os = \"openbsd\", target_os = \"solaris\"))".dependencies.once_cell]
+[target."cfg(any(target_os = \"dragonfly\", target_os = \"freebsd\", target_os = \"illumos\", target_os = \"netbsd\", target_os = \"openbsd\", target_os = \"solaris\"))".dependencies.once_cell]
 version = "1.5.2"
 features = ["std"]
 default-features = false
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 88d7d81..e847097 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -10,7 +10,7 @@
 name = "ring"
 readme = "doc/link-to-readme.md"
 repository = "https://github.com/briansmith/ring"
-version = "0.16.18"
+version = "0.16.19"
 
 # Prevent multiple versions of *ring* from being linked into the same program.
 links = "ring-asm"
@@ -306,7 +306,7 @@
 libc = { version = "0.2.69", default-features = false }
 once_cell = { version = "1.5.2", default-features = false, features=["std"], optional = true }
 
-[target.'cfg(any(target_os = "freebsd", target_os = "illumos", target_os = "netbsd", target_os = "openbsd", target_os = "solaris"))'.dependencies]
+[target.'cfg(any(target_os = "dragonfly", target_os = "freebsd", target_os = "illumos", target_os = "netbsd", target_os = "openbsd", target_os = "solaris"))'.dependencies]
 once_cell = { version = "1.5.2", default-features = false, features=["std"] }
 
 [target.'cfg(all(target_arch = "wasm32", target_vendor = "unknown", target_os = "unknown", target_env = ""))'.dependencies]
diff --git a/METADATA b/METADATA
index 36b9165..e605092 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/ring/ring-0.16.18.crate"
+    value: "https://static.crates.io/crates/ring/ring-0.16.19.crate"
   }
-  version: "0.16.18"
+  version: "0.16.19"
   license_type: NOTICE
   last_upgrade_date {
     year: 2020
-    month: 11
-    day: 30
+    month: 12
+    day: 7
   }
 }
diff --git a/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
index fa4847e..804df81 100644
--- a/crypto/fipsmodule/aes/asm/aesv8-armx.pl
+++ b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
@@ -251,11 +251,7 @@
 .type	GFp_${prefix}_${dir}crypt,%function
 .align	5
 GFp_${prefix}_${dir}crypt:
-___
-$code.=<<___	if ($flavour =~ /64/);
 	AARCH64_VALID_CALL_TARGET
-___
-$code.=<<___;
 	ldr	$rounds,[$key,#240]
 	vld1.32	{$rndkey0},[$key],#16
 	vld1.8	{$inout},[$inp]
@@ -335,20 +331,34 @@
 	add		$key_,$key,#32
 	mov		$cnt,$rounds
 	cclr		$step,lo
+
+	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	// affected by silicon errata #1742098 [0] and #1655431 [1],
+	// respectively, where the second instruction of an aese/aesmc
+	// instruction pair may execute twice if an interrupt is taken right
+	// after the first instruction consumes an input register of which a
+	// single 32-bit lane has been updated the last time it was modified.
+	//
+	// This function uses a counter in one 32-bit lane. The vmov.32 lines
+	// could write to $dat1 and $dat2 directly, but that trips this bugs.
+	// We write to $ivec and copy to the final register as a workaround.
+	//
+	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev		$ctr, $ctr
 #endif
-	vorr		$dat1,$dat0,$dat0
 	add		$tctr1, $ctr, #1
-	vorr		$dat2,$dat0,$dat0
-	add		$ctr, $ctr, #2
 	vorr		$ivec,$dat0,$dat0
 	rev		$tctr1, $tctr1
-	vmov.32		${dat1}[3],$tctr1
+	vmov.32		${ivec}[3],$tctr1
+	add		$ctr, $ctr, #2
+	vorr		$dat1,$ivec,$ivec
 	b.ls		.Lctr32_tail
 	rev		$tctr2, $ctr
+	vmov.32		${ivec}[3],$tctr2
 	sub		$len,$len,#3		// bias
-	vmov.32		${dat2}[3],$tctr2
+	vorr		$dat2,$ivec,$ivec
 	b		.Loop3x_ctr32
 
 .align	4
@@ -375,11 +385,11 @@
 	aese		$dat1,q8
 	aesmc		$tmp1,$dat1
 	 vld1.8		{$in0},[$inp],#16
-	 vorr		$dat0,$ivec,$ivec
+	 add		$tctr0,$ctr,#1
 	aese		$dat2,q8
 	aesmc		$dat2,$dat2
 	 vld1.8		{$in1},[$inp],#16
-	 vorr		$dat1,$ivec,$ivec
+	 rev		$tctr0,$tctr0
 	aese		$tmp0,q9
 	aesmc		$tmp0,$tmp0
 	aese		$tmp1,q9
@@ -388,8 +398,6 @@
 	 mov		$key_,$key
 	aese		$dat2,q9
 	aesmc		$tmp2,$dat2
-	 vorr		$dat2,$ivec,$ivec
-	 add		$tctr0,$ctr,#1
 	aese		$tmp0,q12
 	aesmc		$tmp0,$tmp0
 	aese		$tmp1,q12
@@ -404,21 +412,26 @@
 	aesmc		$tmp0,$tmp0
 	aese		$tmp1,q13
 	aesmc		$tmp1,$tmp1
+	 // Note the logic to update $dat0, $dat1, and $dat1 is written to work
+	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 // 32-bit mode. See the comment above.
 	 veor		$in2,$in2,$rndlast
-	 rev		$tctr0,$tctr0
+	 vmov.32	${ivec}[3], $tctr0
 	aese		$tmp2,q13
 	aesmc		$tmp2,$tmp2
-	 vmov.32	${dat0}[3], $tctr0
+	 vorr		$dat0,$ivec,$ivec
 	 rev		$tctr1,$tctr1
 	aese		$tmp0,q14
 	aesmc		$tmp0,$tmp0
+	 vmov.32	${ivec}[3], $tctr1
+	 rev		$tctr2,$ctr
 	aese		$tmp1,q14
 	aesmc		$tmp1,$tmp1
-	 vmov.32	${dat1}[3], $tctr1
-	 rev		$tctr2,$ctr
+	 vorr		$dat1,$ivec,$ivec
+	 vmov.32	${ivec}[3], $tctr2
 	aese		$tmp2,q14
 	aesmc		$tmp2,$tmp2
-	 vmov.32	${dat2}[3], $tctr2
+	 vorr		$dat2,$ivec,$ivec
 	 subs		$len,$len,#3
 	aese		$tmp0,q15
 	aese		$tmp1,q15
diff --git a/crypto/fipsmodule/modes/asm/ghashv8-armx.pl b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
index 3c1846c..3a551c2 100644
--- a/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
+++ b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
@@ -86,11 +86,7 @@
 .type	GFp_gcm_init_clmul,%function
 .align	4
 GFp_gcm_init_clmul:
-___
-$code.=<<___	if ($flavour =~ /64/);
 	AARCH64_VALID_CALL_TARGET
-___
-$code.=<<___;
 	vld1.64		{$t1},[x1]		@ load input H
 	vmov.i8		$xC2,#0xe1
 	vshl.i64	$xC2,$xC2,#57		@ 0xc2.0
@@ -150,11 +146,7 @@
 .type	GFp_gcm_gmult_clmul,%function
 .align	4
 GFp_gcm_gmult_clmul:
-___
-$code.=<<___	if ($flavour =~ /64/);
 	AARCH64_VALID_CALL_TARGET
-___
-$code.=<<___;
 	vld1.64		{$t1},[$Xi]		@ load Xi
 	vmov.i8		$xC2,#0xe1
 	vld1.64		{$H-$Hhl},[$Htbl]	@ load twisted H, ...
@@ -208,8 +200,6 @@
 .type	GFp_gcm_ghash_clmul,%function
 .align	4
 GFp_gcm_ghash_clmul:
-___
-$code.=<<___	if ($flavour =~ /64/);
 	AARCH64_VALID_CALL_TARGET
 ___
 $code.=<<___		if ($flavour !~ /64/);
diff --git a/include/GFp/arm_arch.h b/include/GFp/arm_arch.h
index d215e4a..2e64aa9 100644
--- a/include/GFp/arm_arch.h
+++ b/include/GFp/arm_arch.h
@@ -155,7 +155,7 @@
 #endif
 
 #if GNU_PROPERTY_AARCH64_POINTER_AUTH != 0 || GNU_PROPERTY_AARCH64_BTI != 0
-.pushsection note.gnu.property, "a";
+.pushsection .note.gnu.property, "a";
 .balign 8;
 .long 4;
 .long 0x10;
@@ -164,8 +164,8 @@
 .long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
 .long 4;
 .long (GNU_PROPERTY_AARCH64_POINTER_AUTH | GNU_PROPERTY_AARCH64_BTI);
-.long 0
-.popsection
+.long 0;
+.popsection;
 #endif
 
 #if defined(__GNUC__)
diff --git a/pregenerated/aesni-gcm-x86_64-nasm.obj b/pregenerated/aesni-gcm-x86_64-nasm.obj
index 7811fb6..73fc99b 100644
--- a/pregenerated/aesni-gcm-x86_64-nasm.obj
+++ b/pregenerated/aesni-gcm-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/aesni-x86-win32n.obj b/pregenerated/aesni-x86-win32n.obj
index 2c08071..a2f616e 100644
--- a/pregenerated/aesni-x86-win32n.obj
+++ b/pregenerated/aesni-x86-win32n.obj
Binary files differ
diff --git a/pregenerated/aesni-x86_64-nasm.obj b/pregenerated/aesni-x86_64-nasm.obj
index ad30d5e..9307829 100644
--- a/pregenerated/aesni-x86_64-nasm.obj
+++ b/pregenerated/aesni-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/aesv8-armx-ios32.S b/pregenerated/aesv8-armx-ios32.S
index b958d8a..f592853 100644
--- a/pregenerated/aesv8-armx-ios32.S
+++ b/pregenerated/aesv8-armx-ios32.S
@@ -170,6 +170,7 @@
 #endif
 .align	5
 _GFp_aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -202,6 +203,7 @@
 #endif
 .align	5
 _GFp_aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -255,20 +257,34 @@
 	add	r7,r3,#32
 	mov	r6,r5
 	movlo	r12,#0
+
+	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	@ affected by silicon errata #1742098 [0] and #1655431 [1],
+	@ respectively, where the second instruction of an aese/aesmc
+	@ instruction pair may execute twice if an interrupt is taken right
+	@ after the first instruction consumes an input register of which a
+	@ single 32-bit lane has been updated the last time it was modified.
+	@ 
+	@ This function uses a counter in one 32-bit lane. The 
+	@ could write to q1 and q10 directly, but that trips this bugs.
+	@ We write to q6 and copy to the final register as a workaround.
+	@ 
+	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	r8, r8
 #endif
-	vorr	q1,q0,q0
 	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
 	vorr	q6,q0,q0
 	rev	r10, r10
-	vmov.32	d3[1],r10
+	vmov.32	d13[1],r10
+	add	r8, r8, #2
+	vorr	q1,q6,q6
 	bls	Lctr32_tail
 	rev	r12, r8
+	vmov.32	d13[1],r12
 	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
+	vorr	q10,q6,q6
 	b	Loop3x_ctr32
 
 .align	4
@@ -295,11 +311,11 @@
 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
 	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
+	add	r9,r8,#1
 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
 	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
+	rev	r9,r9
 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
@@ -308,8 +324,6 @@
 	mov	r7,r3
 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
@@ -324,21 +338,26 @@
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	 @ Note the logic to update q0, q1, and q1 is written to work
+	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 @ 32-bit mode. See the comment above.
 	veor	q11,q11,q7
-	rev	r9,r9
+	vmov.32	d13[1], r9
 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
+	vorr	q0,q6,q6
 	rev	r10,r10
 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+	vmov.32	d13[1], r10
+	rev	r12,r8
 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
+	vorr	q1,q6,q6
+	vmov.32	d13[1], r12
 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
+	vorr	q10,q6,q6
 	subs	r2,r2,#3
 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
diff --git a/pregenerated/aesv8-armx-ios64.S b/pregenerated/aesv8-armx-ios64.S
index bbbc573..053fac5 100644
--- a/pregenerated/aesv8-armx-ios64.S
+++ b/pregenerated/aesv8-armx-ios64.S
@@ -248,20 +248,34 @@
 	add	x7,x3,#32
 	mov	w6,w5
 	csel	x12,xzr,x12,lo
+
+	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	// affected by silicon errata #1742098 [0] and #1655431 [1],
+	// respectively, where the second instruction of an aese/aesmc
+	// instruction pair may execute twice if an interrupt is taken right
+	// after the first instruction consumes an input register of which a
+	// single 32-bit lane has been updated the last time it was modified.
+	//
+	// This function uses a counter in one 32-bit lane. The vmov lines
+	// could write to v1.16b and v18.16b directly, but that trips this bugs.
+	// We write to v6.16b and copy to the final register as a workaround.
+	//
+	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	w8, w8
 #endif
-	orr	v1.16b,v0.16b,v0.16b
 	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
 	orr	v6.16b,v0.16b,v0.16b
 	rev	w10, w10
-	mov	v1.s[3],w10
+	mov	v6.s[3],w10
+	add	w8, w8, #2
+	orr	v1.16b,v6.16b,v6.16b
 	b.ls	Lctr32_tail
 	rev	w12, w8
+	mov	v6.s[3],w12
 	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
+	orr	v18.16b,v6.16b,v6.16b
 	b	Loop3x_ctr32
 
 .align	4
@@ -288,11 +302,11 @@
 	aese	v1.16b,v16.16b
 	aesmc	v5.16b,v1.16b
 	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
+	add	w9,w8,#1
 	aese	v18.16b,v16.16b
 	aesmc	v18.16b,v18.16b
 	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
+	rev	w9,w9
 	aese	v4.16b,v17.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v17.16b
@@ -301,8 +315,6 @@
 	mov	x7,x3
 	aese	v18.16b,v17.16b
 	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
 	aese	v4.16b,v20.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v20.16b
@@ -317,21 +329,26 @@
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v21.16b
 	aesmc	v5.16b,v5.16b
+	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
+	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 // 32-bit mode. See the comment above.
 	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
+	mov	v6.s[3], w9
 	aese	v17.16b,v21.16b
 	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
+	orr	v0.16b,v6.16b,v6.16b
 	rev	w10,w10
 	aese	v4.16b,v22.16b
 	aesmc	v4.16b,v4.16b
+	mov	v6.s[3], w10
+	rev	w12,w8
 	aese	v5.16b,v22.16b
 	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
+	orr	v1.16b,v6.16b,v6.16b
+	mov	v6.s[3], w12
 	aese	v17.16b,v22.16b
 	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
+	orr	v18.16b,v6.16b,v6.16b
 	subs	x2,x2,#3
 	aese	v4.16b,v23.16b
 	aese	v5.16b,v23.16b
diff --git a/pregenerated/aesv8-armx-linux32.S b/pregenerated/aesv8-armx-linux32.S
index 1b9037a..7b46da6 100644
--- a/pregenerated/aesv8-armx-linux32.S
+++ b/pregenerated/aesv8-armx-linux32.S
@@ -167,6 +167,7 @@
 .type	GFp_aes_hw_encrypt,%function
 .align	5
 GFp_aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -197,6 +198,7 @@
 .type	GFp_aes_hw_decrypt,%function
 .align	5
 GFp_aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -248,20 +250,34 @@
 	add	r7,r3,#32
 	mov	r6,r5
 	movlo	r12,#0
+
+	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	@ affected by silicon errata #1742098 [0] and #1655431 [1],
+	@ respectively, where the second instruction of an aese/aesmc
+	@ instruction pair may execute twice if an interrupt is taken right
+	@ after the first instruction consumes an input register of which a
+	@ single 32-bit lane has been updated the last time it was modified.
+	@ 
+	@ This function uses a counter in one 32-bit lane. The 
+	@ could write to q1 and q10 directly, but that trips this bugs.
+	@ We write to q6 and copy to the final register as a workaround.
+	@ 
+	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	r8, r8
 #endif
-	vorr	q1,q0,q0
 	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
 	vorr	q6,q0,q0
 	rev	r10, r10
-	vmov.32	d3[1],r10
+	vmov.32	d13[1],r10
+	add	r8, r8, #2
+	vorr	q1,q6,q6
 	bls	.Lctr32_tail
 	rev	r12, r8
+	vmov.32	d13[1],r12
 	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
+	vorr	q10,q6,q6
 	b	.Loop3x_ctr32
 
 .align	4
@@ -288,11 +304,11 @@
 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
 	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
+	add	r9,r8,#1
 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
 	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
+	rev	r9,r9
 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
@@ -301,8 +317,6 @@
 	mov	r7,r3
 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
@@ -317,21 +331,26 @@
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	 @ Note the logic to update q0, q1, and q1 is written to work
+	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 @ 32-bit mode. See the comment above.
 	veor	q11,q11,q7
-	rev	r9,r9
+	vmov.32	d13[1], r9
 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
+	vorr	q0,q6,q6
 	rev	r10,r10
 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+	vmov.32	d13[1], r10
+	rev	r12,r8
 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
+	vorr	q1,q6,q6
+	vmov.32	d13[1], r12
 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
+	vorr	q10,q6,q6
 	subs	r2,r2,#3
 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
diff --git a/pregenerated/aesv8-armx-linux64.S b/pregenerated/aesv8-armx-linux64.S
index 4428fd5..fd6987a 100644
--- a/pregenerated/aesv8-armx-linux64.S
+++ b/pregenerated/aesv8-armx-linux64.S
@@ -249,20 +249,34 @@
 	add	x7,x3,#32
 	mov	w6,w5
 	csel	x12,xzr,x12,lo
+
+	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	// affected by silicon errata #1742098 [0] and #1655431 [1],
+	// respectively, where the second instruction of an aese/aesmc
+	// instruction pair may execute twice if an interrupt is taken right
+	// after the first instruction consumes an input register of which a
+	// single 32-bit lane has been updated the last time it was modified.
+	//
+	// This function uses a counter in one 32-bit lane. The vmov lines
+	// could write to v1.16b and v18.16b directly, but that trips this bugs.
+	// We write to v6.16b and copy to the final register as a workaround.
+	//
+	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	w8, w8
 #endif
-	orr	v1.16b,v0.16b,v0.16b
 	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
 	orr	v6.16b,v0.16b,v0.16b
 	rev	w10, w10
-	mov	v1.s[3],w10
+	mov	v6.s[3],w10
+	add	w8, w8, #2
+	orr	v1.16b,v6.16b,v6.16b
 	b.ls	.Lctr32_tail
 	rev	w12, w8
+	mov	v6.s[3],w12
 	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
+	orr	v18.16b,v6.16b,v6.16b
 	b	.Loop3x_ctr32
 
 .align	4
@@ -289,11 +303,11 @@
 	aese	v1.16b,v16.16b
 	aesmc	v5.16b,v1.16b
 	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
+	add	w9,w8,#1
 	aese	v18.16b,v16.16b
 	aesmc	v18.16b,v18.16b
 	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
+	rev	w9,w9
 	aese	v4.16b,v17.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v17.16b
@@ -302,8 +316,6 @@
 	mov	x7,x3
 	aese	v18.16b,v17.16b
 	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
 	aese	v4.16b,v20.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v20.16b
@@ -318,21 +330,26 @@
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v21.16b
 	aesmc	v5.16b,v5.16b
+	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
+	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 // 32-bit mode. See the comment above.
 	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
+	mov	v6.s[3], w9
 	aese	v17.16b,v21.16b
 	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
+	orr	v0.16b,v6.16b,v6.16b
 	rev	w10,w10
 	aese	v4.16b,v22.16b
 	aesmc	v4.16b,v4.16b
+	mov	v6.s[3], w10
+	rev	w12,w8
 	aese	v5.16b,v22.16b
 	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
+	orr	v1.16b,v6.16b,v6.16b
+	mov	v6.s[3], w12
 	aese	v17.16b,v22.16b
 	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
+	orr	v18.16b,v6.16b,v6.16b
 	subs	x2,x2,#3
 	aese	v4.16b,v23.16b
 	aese	v5.16b,v23.16b
diff --git a/pregenerated/chacha-x86-win32n.obj b/pregenerated/chacha-x86-win32n.obj
index 7672e5f..4054d28 100644
--- a/pregenerated/chacha-x86-win32n.obj
+++ b/pregenerated/chacha-x86-win32n.obj
Binary files differ
diff --git a/pregenerated/chacha-x86_64-nasm.obj b/pregenerated/chacha-x86_64-nasm.obj
index f4361f8..a1d7048 100644
--- a/pregenerated/chacha-x86_64-nasm.obj
+++ b/pregenerated/chacha-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/ecp_nistz256-x86-win32n.obj b/pregenerated/ecp_nistz256-x86-win32n.obj
index d96624d..595488f 100644
--- a/pregenerated/ecp_nistz256-x86-win32n.obj
+++ b/pregenerated/ecp_nistz256-x86-win32n.obj
Binary files differ
diff --git a/pregenerated/ghash-x86-win32n.obj b/pregenerated/ghash-x86-win32n.obj
index 66dfb89..4a2380b 100644
--- a/pregenerated/ghash-x86-win32n.obj
+++ b/pregenerated/ghash-x86-win32n.obj
Binary files differ
diff --git a/pregenerated/ghash-x86_64-nasm.obj b/pregenerated/ghash-x86_64-nasm.obj
index 816ff57..7e68530 100644
--- a/pregenerated/ghash-x86_64-nasm.obj
+++ b/pregenerated/ghash-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/ghashv8-armx-ios32.S b/pregenerated/ghashv8-armx-ios32.S
index 71efeea..ac25245 100644
--- a/pregenerated/ghashv8-armx-ios32.S
+++ b/pregenerated/ghashv8-armx-ios32.S
@@ -22,6 +22,7 @@
 #endif
 .align	4
 _GFp_gcm_init_clmul:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r1]		@ load input H
 	vmov.i8	q11,#0xe1
 	vshl.i64	q11,q11,#57		@ 0xc2.0
@@ -75,6 +76,7 @@
 #endif
 .align	4
 _GFp_gcm_gmult_clmul:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r0]		@ load Xi
 	vmov.i8	q11,#0xe1
 	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
@@ -119,6 +121,7 @@
 #endif
 .align	4
 _GFp_gcm_ghash_clmul:
+	AARCH64_VALID_CALL_TARGET
 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
 	vld1.64	{q0},[r0]		@ load [rotated] Xi
 						@ "[rotated]" means that
diff --git a/pregenerated/ghashv8-armx-linux32.S b/pregenerated/ghashv8-armx-linux32.S
index 0ece407..fe947e2 100644
--- a/pregenerated/ghashv8-armx-linux32.S
+++ b/pregenerated/ghashv8-armx-linux32.S
@@ -21,6 +21,7 @@
 .type	GFp_gcm_init_clmul,%function
 .align	4
 GFp_gcm_init_clmul:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r1]		@ load input H
 	vmov.i8	q11,#0xe1
 	vshl.i64	q11,q11,#57		@ 0xc2.0
@@ -72,6 +73,7 @@
 .type	GFp_gcm_gmult_clmul,%function
 .align	4
 GFp_gcm_gmult_clmul:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r0]		@ load Xi
 	vmov.i8	q11,#0xe1
 	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
@@ -114,6 +116,7 @@
 .type	GFp_gcm_ghash_clmul,%function
 .align	4
 GFp_gcm_ghash_clmul:
+	AARCH64_VALID_CALL_TARGET
 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
 	vld1.64	{q0},[r0]		@ load [rotated] Xi
 						@ "[rotated]" means that
diff --git a/pregenerated/p256-x86_64-asm-nasm.obj b/pregenerated/p256-x86_64-asm-nasm.obj
index 9a909e5..a76a4ac 100644
--- a/pregenerated/p256-x86_64-asm-nasm.obj
+++ b/pregenerated/p256-x86_64-asm-nasm.obj
Binary files differ
diff --git a/pregenerated/sha256-x86_64-nasm.obj b/pregenerated/sha256-x86_64-nasm.obj
index afca937..8f54357 100644
--- a/pregenerated/sha256-x86_64-nasm.obj
+++ b/pregenerated/sha256-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/sha512-x86_64-nasm.obj b/pregenerated/sha512-x86_64-nasm.obj
index b8ef5ed..31f17f1 100644
--- a/pregenerated/sha512-x86_64-nasm.obj
+++ b/pregenerated/sha512-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/vpaes-x86-win32n.obj b/pregenerated/vpaes-x86-win32n.obj
index a9adfba..882b83e 100644
--- a/pregenerated/vpaes-x86-win32n.obj
+++ b/pregenerated/vpaes-x86-win32n.obj
Binary files differ
diff --git a/pregenerated/vpaes-x86_64-nasm.obj b/pregenerated/vpaes-x86_64-nasm.obj
index 8c7b384..a01a006 100644
--- a/pregenerated/vpaes-x86_64-nasm.obj
+++ b/pregenerated/vpaes-x86_64-nasm.obj
Binary files differ
diff --git a/pregenerated/x86-mont-win32n.obj b/pregenerated/x86-mont-win32n.obj
index 6d0e2b6..a1e10aa 100644
--- a/pregenerated/x86-mont-win32n.obj
+++ b/pregenerated/x86-mont-win32n.obj
Binary files differ
diff --git a/pregenerated/x86_64-mont-nasm.obj b/pregenerated/x86_64-mont-nasm.obj
index de83c2c..c9be599 100644
--- a/pregenerated/x86_64-mont-nasm.obj
+++ b/pregenerated/x86_64-mont-nasm.obj
Binary files differ
diff --git a/pregenerated/x86_64-mont5-nasm.obj b/pregenerated/x86_64-mont5-nasm.obj
index 8b060cf..36320e6 100644
--- a/pregenerated/x86_64-mont5-nasm.obj
+++ b/pregenerated/x86_64-mont5-nasm.obj
Binary files differ
diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs
index c616aca..13d80c0 100644
--- a/src/ec/suite_b/ops.rs
+++ b/src/ec/suite_b/ops.rs
@@ -1176,84 +1176,6 @@
     }
 }
 
-#[cfg(feature = "internal_benches")]
-mod internal_benches {
-    use super::{Limb, MAX_LIMBS};
-
-    pub const LIMBS_1: [Limb; MAX_LIMBS] = limbs![1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-
-    pub const LIMBS_ALTERNATING_10: [Limb; MAX_LIMBS] = limbs![
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010,
-        0b10101010_10101010_10101010_10101010
-    ];
-}
-
-#[cfg(feature = "internal_benches")]
-macro_rules! bench_curve {
-    ( $vectors:expr ) => {
-        use super::super::{Elem, Scalar};
-        extern crate test;
-
-        #[bench]
-        fn elem_inverse_squared_bench(bench: &mut test::Bencher) {
-            // This benchmark assumes that `elem_inverse_squared()` is
-            // constant-time so inverting 1 mod q is as good of a choice as
-            // anything.
-            let mut a = Elem::zero();
-            a.limbs[0] = 1;
-            bench.iter(|| {
-                let _ = PRIVATE_KEY_OPS.elem_inverse_squared(&a);
-            });
-        }
-
-        #[bench]
-        fn elem_product_bench(bench: &mut test::Bencher) {
-            // This benchmark assumes that the multiplication is constant-time
-            // so 0 * 0 is as good of a choice as anything.
-            let a: Elem<R> = Elem::zero();
-            let b: Elem<R> = Elem::zero();
-            bench.iter(|| {
-                let _ = COMMON_OPS.elem_product(&a, &b);
-            });
-        }
-
-        #[bench]
-        fn elem_squared_bench(bench: &mut test::Bencher) {
-            // This benchmark assumes that the squaring is constant-time so
-            // 0**2 * 0 is as good of a choice as anything.
-            let a = Elem::zero();
-            bench.iter(|| {
-                let _ = COMMON_OPS.elem_squared(&a);
-            });
-        }
-
-        #[bench]
-        fn scalar_inv_to_mont_bench(bench: &mut test::Bencher) {
-            const VECTORS: &[Scalar] = $vectors;
-            let vectors_len = VECTORS.len();
-            let mut i = 0;
-            bench.iter(|| {
-                let _ = SCALAR_OPS.scalar_inv_to_mont(&VECTORS[i]);
-
-                i += 1;
-                if i == vectors_len {
-                    i = 0;
-                }
-            });
-        }
-    };
-}
-
 mod elem;
 pub mod p256;
 pub mod p384;
diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs
index 69c89eb..4c54f5c 100644
--- a/src/ec/suite_b/ops/p256.rs
+++ b/src/ec/suite_b/ops/p256.rs
@@ -380,36 +380,3 @@
         rep: Limb,
     );
 }
-
-#[cfg(feature = "internal_benches")]
-mod internal_benches {
-    use super::{super::internal_benches::*, *};
-
-    bench_curve!(&[
-        Scalar {
-            limbs: LIMBS_1,
-            m: PhantomData,
-            encoding: PhantomData,
-        },
-        Scalar {
-            limbs: LIMBS_ALTERNATING_10,
-            m: PhantomData,
-            encoding: PhantomData,
-        },
-        Scalar {
-            // n - 1
-            limbs: p256_limbs![
-                0xfc632551 - 1,
-                0xf3b9cac2,
-                0xa7179e84,
-                0xbce6faad,
-                0xffffffff,
-                0xffffffff,
-                0x00000000,
-                0xffffffff
-            ],
-            m: PhantomData,
-            encoding: PhantomData,
-        },
-    ]);
-}
diff --git a/src/ec/suite_b/ops/p384.rs b/src/ec/suite_b/ops/p384.rs
index 4b2ecb8..7ecba1f 100644
--- a/src/ec/suite_b/ops/p384.rs
+++ b/src/ec/suite_b/ops/p384.rs
@@ -368,40 +368,3 @@
         b: *const Limb, // [COMMON_OPS.num_limbs]
     );
 }
-
-#[cfg(feature = "internal_benches")]
-mod internal_benches {
-    use super::{super::internal_benches::*, *};
-
-    bench_curve!(&[
-        Scalar {
-            limbs: LIMBS_1,
-            encoding: PhantomData,
-            m: PhantomData
-        },
-        Scalar {
-            limbs: LIMBS_ALTERNATING_10,
-            encoding: PhantomData,
-            m: PhantomData
-        },
-        Scalar {
-            // n - 1
-            limbs: p384_limbs![
-                0xccc52973 - 1,
-                0xecec196a,
-                0x48b0a77a,
-                0x581a0db2,
-                0xf4372ddf,
-                0xc7634d81,
-                0xffffffff,
-                0xffffffff,
-                0xffffffff,
-                0xffffffff,
-                0xffffffff,
-                0xffffffff
-            ],
-            encoding: PhantomData,
-            m: PhantomData,
-        },
-    ]);
-}
diff --git a/src/lib.rs b/src/lib.rs
index 30c2bf8..86a6099 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -70,23 +70,18 @@
 )]
 // `#[derive(...)]` uses `trivial_numeric_casts` and `unused_qualifications`
 // internally.
-#![deny(
-    missing_docs,
-    unstable_features, // Used by `internal_benches`
-    unused_qualifications,
-    variant_size_differences,
-)]
+#![deny(missing_docs, unused_qualifications, variant_size_differences)]
 #![forbid(
     anonymous_parameters,
     trivial_casts,
     trivial_numeric_casts,
+    unstable_features,
     unused_extern_crates,
     unused_import_braces,
     unused_results,
     warnings
 )]
 #![no_std]
-#![cfg_attr(feature = "internal_benches", allow(unstable_features), feature(test))]
 
 #[cfg(feature = "alloc")]
 extern crate alloc;
diff --git a/src/rand.rs b/src/rand.rs
index 6957952..9d1864f 100644
--- a/src/rand.rs
+++ b/src/rand.rs
@@ -180,6 +180,7 @@
 use self::sysrand_or_urandom::fill as fill_impl;
 
 #[cfg(any(
+    target_os = "dragonfly",
     target_os = "freebsd",
     target_os = "illumos",
     target_os = "netbsd",
@@ -352,6 +353,7 @@
         any(target_os = "android", target_os = "linux"),
         feature = "dev_urandom_fallback"
     ),
+    target_os = "dragonfly",
     target_os = "freebsd",
     target_os = "netbsd",
     target_os = "openbsd",