Merge "Update the scudo wrapper for bionic changes."
diff --git a/libc/Android.bp b/libc/Android.bp
index 80b157a..1f54a0a 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -777,6 +777,7 @@
         "bionic/android_set_abort_message.cpp",
 
         "bionic/strchr.cpp",
+        "bionic/strchrnul.cpp",
         "bionic/strnlen.c",
         "bionic/strrchr.cpp",
     ],
@@ -837,25 +838,17 @@
         },
         arm64: {
             srcs: [
-                "arch-arm64/generic/bionic/memcmp.S",
                 "arch-arm64/generic/bionic/memcpy.S",
                 "arch-arm64/generic/bionic/memmove.S",
                 "arch-arm64/generic/bionic/memset.S",
-                "arch-arm64/generic/bionic/stpcpy.S",
-                "arch-arm64/generic/bionic/strcpy.S",
                 "arch-arm64/generic/bionic/wmemmove.S",
 
-                "arch-arm64/default/bionic/memchr.S",
-                "arch-arm64/default/bionic/strchr.S",
-                "arch-arm64/default/bionic/strcmp.S",
-                "arch-arm64/default/bionic/strlen.S",
-                "arch-arm64/default/bionic/strncmp.S",
-                "arch-arm64/default/bionic/strnlen.S",
-
                 "arch-arm64/mte/bionic/memchr.c",
-                "arch-arm64/mte/bionic/strchr.cpp",
+                "arch-arm64/mte/bionic/stpcpy.S",
+                "arch-arm64/mte/bionic/strchrnul.cpp",
+                "arch-arm64/mte/bionic/strrchr.cpp",
                 "arch-arm64/mte/bionic/strcmp.c",
-                "arch-arm64/mte/bionic/strlen.c",
+                "arch-arm64/mte/bionic/strcpy.S",
                 "arch-arm64/mte/bionic/strncmp.c",
                 "arch-arm64/mte/bionic/strnlen.c",
 
@@ -868,7 +861,9 @@
             exclude_srcs: [
                 "bionic/__memcpy_chk.cpp",
                 "bionic/strchr.cpp",
+                "bionic/strchrnul.cpp",
                 "bionic/strnlen.c",
+                "bionic/strrchr.cpp",
             ],
         },
 
@@ -1121,7 +1116,6 @@
         "bionic/spawn.cpp",
         "bionic/stat.cpp",
         "bionic/stdlib_l.cpp",
-        "bionic/strchrnul.cpp",
         "bionic/strerror.cpp",
         "bionic/string_l.cpp",
         "bionic/strings_l.cpp",
@@ -1358,6 +1352,7 @@
 
     whole_static_libs: [
         "gwp_asan",
+        "libarm-optimized-routines-string",
         "libc_bionic_ndk",
         "libc_bootstrap",
         "libc_fortify",
@@ -1388,6 +1383,7 @@
     name: "libc_nopthread",
 
     whole_static_libs: [
+        "libarm-optimized-routines-string",
         "libc_bionic",
         "libc_bionic_ndk",
         "libc_bootstrap",
diff --git a/libc/arch-arm64/default/bionic/memchr.S b/libc/arch-arm64/default/bionic/memchr.S
deleted file mode 100644
index 7fbcc8f..0000000
--- a/libc/arch-arm64/default/bionic/memchr.S
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *
-   Copyright (c) 2014, ARM Limited
-   All rights Reserved.
-   Copyright (c) 2014, Linaro Ltd.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the company nor the names of its contributors
-         may be used to endorse or promote products derived from this
-         software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-#include <private/bionic_asm.h>
-
-/* Arguments and results.  */
-#define srcin		x0
-#define chrin		w1
-#define cntin		x2
-
-#define result		x0
-
-#define src		x3
-#define	tmp		x4
-#define wtmp2		w5
-#define synd		x6
-#define soff		x9
-#define cntrem		x10
-
-#define vrepchr		v0
-#define vdata1		v1
-#define vdata2		v2
-#define vhas_chr1	v3
-#define vhas_chr2	v4
-#define vrepmask	v5
-#define vend		v6
-
-/*
- * Core algorithm:
- *
- * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
- * per byte. For each tuple, bit 0 is set if the relevant byte matched the
- * requested character and bit 1 is not used (faster than using a 32bit
- * syndrome). Since the bits in the syndrome reflect exactly the order in which
- * things occur in the original string, counting trailing zeros allows to
- * identify exactly which byte has matched.
- */
-
-ENTRY(memchr_default)
-	/*
-	 * Magic constant 0x40100401 allows us to identify which lane matches
-	 * the requested byte.
-	 */
-	cbz	cntin, .Lzero_length
-	mov	wtmp2, #0x0401
-	movk	wtmp2, #0x4010, lsl #16
-	dup	vrepchr.16b, chrin
-	/* Work with aligned 32-byte chunks */
-	bic	src, srcin, #31
-	dup	vrepmask.4s, wtmp2
-	ands	soff, srcin, #31
-	and	cntrem, cntin, #31
-	b.eq	.Lloop
-
-	/*
-	 * Input string is not 32-byte aligned. We calculate the syndrome
-	 * value for the aligned 32 bytes block containing the first bytes
-	 * and mask the irrelevant part.
-	 */
-
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
-	sub	tmp, soff, #32
-	adds	cntin, cntin, tmp
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
-	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
-	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
-	mov	synd, vend.d[0]
-	/* Clear the soff*2 lower bits */
-	lsl	tmp, soff, #1
-	lsr	synd, synd, tmp
-	lsl	synd, synd, tmp
-	/* The first block can also be the last */
-	b.ls	.Lmasklast
-	/* Have we found something already? */
-	cbnz	synd, .Ltail
-
-.Lloop:
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
-	subs	cntin, cntin, #32
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
-	/* If we're out of data we finish regardless of the result */
-	b.ls	.Lend
-	/* Use a fast check for the termination condition */
-	orr	vend.16b, vhas_chr1.16b, vhas_chr2.16b
-	addp	vend.2d, vend.2d, vend.2d
-	mov	synd, vend.d[0]
-	/* We're not out of data, loop if we haven't found the character */
-	cbz	synd, .Lloop
-
-.Lend:
-	/* Termination condition found, let's calculate the syndrome value */
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
-	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
-	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
-	mov	synd, vend.d[0]
-	/* Only do the clear for the last possible block */
-	b.hi	.Ltail
-
-.Lmasklast:
-	/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
-	add	tmp, cntrem, soff
-	and	tmp, tmp, #31
-	sub	tmp, tmp, #32
-	neg	tmp, tmp, lsl #1
-	lsl	synd, synd, tmp
-	lsr	synd, synd, tmp
-
-.Ltail:
-	/* Count the trailing zeros using bit reversing */
-	rbit	synd, synd
-	/* Compensate the last post-increment */
-	sub	src, src, #32
-	/* Check that we have found a character */
-	cmp	synd, #0
-	/* And count the leading zeros */
-	clz	synd, synd
-	/* Compute the potential result */
-	add	result, src, synd, lsr #1
-	/* Select result or NULL */
-	csel	result, xzr, result, eq
-	ret
-
-.Lzero_length:
-	mov	result, xzr
-	ret
-END(memchr_default)
diff --git a/libc/arch-arm64/default/bionic/strchr.S b/libc/arch-arm64/default/bionic/strchr.S
deleted file mode 100644
index f8cb724..0000000
--- a/libc/arch-arm64/default/bionic/strchr.S
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *
-   Copyright (c) 2014, ARM Limited
-   All rights Reserved.
-   Copyright (c) 2014, Linaro Ltd.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the company nor the names of its contributors
-         may be used to endorse or promote products derived from this
-         software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-#include <private/bionic_asm.h>
-
-/* Arguments and results.  */
-#define srcin		x0
-#define chrin		w1
-
-#define result		x0
-
-#define src		x2
-#define	tmp1		x3
-#define wtmp2		w4
-#define tmp3		x5
-
-#define vrepchr		v0
-#define vdata1		v1
-#define vdata2		v2
-#define vhas_nul1	v3
-#define vhas_nul2	v4
-#define vhas_chr1	v5
-#define vhas_chr2	v6
-#define vrepmask_0	v7
-#define vrepmask_c	v16
-#define vend1		v17
-#define vend2		v18
-
-/* Core algorithm.
-
-   For each 32-byte hunk we calculate a 64-bit syndrome value, with
-   two bits per byte (LSB is always in bits 0 and 1, for both big
-   and little-endian systems).  For each tuple, bit 0 is set iff
-   the relevant byte matched the requested character; bit 1 is set
-   iff the relevant byte matched the NUL end of string (we trigger
-   off bit0 for the special case of looking for NUL).  Since the bits
-   in the syndrome reflect exactly the order in which things occur
-   in the original string a count_trailing_zeros() operation will
-   identify exactly which byte is causing the termination, and why.  */
-
-/* Locals and temporaries.  */
-
-ENTRY(strchr_default)
-	/* Magic constant 0x40100401 to allow us to identify which lane
-	   matches the requested byte.  Magic constant 0x80200802 used
-	   similarly for NUL termination.  */
-	mov	wtmp2, #0x0401
-	movk	wtmp2, #0x4010, lsl #16
-	dup	vrepchr.16b, chrin
-	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
-	dup	vrepmask_c.4s, wtmp2
-	ands	tmp1, srcin, #31
-	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
-	b.eq	.Lloop
-
-	/* Input string is not 32-byte aligned.  Rather than forcing
-	   the padding bytes to a safe value, we calculate the syndrome
-	   for all the bytes, but then mask off those bits of the
-	   syndrome that are related to the padding.  */
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
-	neg	tmp1, tmp1
-	cmeq	vhas_nul1.16b, vdata1.16b, #0
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
-	cmeq	vhas_nul2.16b, vdata2.16b, #0
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
-	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
-	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
-	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
-	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
-	lsl	tmp1, tmp1, #1
-	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
-	mov	tmp3, #~0
-	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
-	lsr	tmp1, tmp3, tmp1
-
-	mov	tmp3, vend1.d[0]
-	bic	tmp1, tmp3, tmp1	// Mask padding bits.
-	cbnz	tmp1, .Ltail
-
-.Lloop:
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
-	cmeq	vhas_nul1.16b, vdata1.16b, #0
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
-	cmeq	vhas_nul2.16b, vdata2.16b, #0
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
-	/* Use a fast check for the termination condition.  */
-	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
-	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
-	orr	vend1.16b, vend1.16b, vend2.16b
-	addp	vend1.2d, vend1.2d, vend1.2d
-	mov	tmp1, vend1.d[0]
-	cbz	tmp1, .Lloop
-
-	/* Termination condition found.  Now need to establish exactly why
-	   we terminated.  */
-	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
-	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
-	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
-	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
-	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
-	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
-
-	mov	tmp1, vend1.d[0]
-.Ltail:
-	/* Count the trailing zeros, by bit reversing...  */
-	rbit	tmp1, tmp1
-	/* Re-bias source.  */
-	sub	src, src, #32
-	clz	tmp1, tmp1	/* And counting the leading zeros.  */
-	/* Tmp1 is even if the target charager was found first.  Otherwise
-	   we've found the end of string and we weren't looking for NUL.  */
-	tst	tmp1, #1
-	add	result, src, tmp1, lsr #1
-	csel	result, result, xzr, eq
-	ret
-END(strchr_default)
diff --git a/libc/arch-arm64/default/bionic/strcmp.S b/libc/arch-arm64/default/bionic/strcmp.S
deleted file mode 100644
index dfac7c4..0000000
--- a/libc/arch-arm64/default/bionic/strcmp.S
+++ /dev/null
@@ -1,192 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the Linaro nor the
-         names of its contributors may be used to endorse or promote products
-         derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-#include <private/bionic_asm.h>
-
-#define L(label) .L ## label
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result.  */
-#define src1		x0
-#define src2		x1
-#define result		x0
-
-/* Internal variables.  */
-#define data1		x2
-#define data1w		w2
-#define data2		x3
-#define data2w		w3
-#define has_nul		x4
-#define diff		x5
-#define syndrome	x6
-#define tmp1		x7
-#define tmp2		x8
-#define tmp3		x9
-#define zeroones	x10
-#define pos		x11
-
-	/* Start of performance-critical section  -- one 64B cache line.  */
-ENTRY(strcmp_default)
-.p2align  6
-	eor	tmp1, src1, src2
-	mov	zeroones, #REP8_01
-	tst	tmp1, #7
-	b.ne	L(misaligned8)
-	ands	tmp1, src1, #7
-	b.ne	L(mutual_align)
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word.  */
-L(loop_aligned):
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-L(start_realigned):
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	orr	syndrome, diff, has_nul
-	cbz	syndrome, L(loop_aligned)
-	/* End of performance-critical section  -- one 64B cache line.  */
-
-L(end):
-#ifndef	__AARCH64EB__
-	rev	syndrome, syndrome
-	rev	data1, data1
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	clz	pos, syndrome
-	rev	data2, data2
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-#else
-	/* For big-endian we cannot use the trick with the syndrome value
-	   as carry-propagation can corrupt the upper bits if the trailing
-	   bytes in the string contain 0x01.  */
-	/* However, if there is no NUL byte in the dword, we can generate
-	   the result directly.  We can't just subtract the bytes as the
-	   MSB might be significant.  */
-	cbnz	has_nul, 1f
-	cmp	data1, data2
-	cset	result, ne
-	cneg	result, result, lo
-	ret
-1:
-	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
-	rev	tmp3, data1
-	sub	tmp1, tmp3, zeroones
-	orr	tmp2, tmp3, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	rev	has_nul, has_nul
-	orr	syndrome, diff, has_nul
-	clz	pos, syndrome
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-#endif
-
-L(mutual_align):
-	/* Sources are mutually aligned, but are not currently at an
-	   alignment boundary.  Round down the addresses and then mask off
-	   the bytes that preceed the start point.  */
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	ldr	data1, [src1], #8
-	neg	tmp1, tmp1		/* Bits to alignment -64.  */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#endif
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	b	L(start_realigned)
-
-L(misaligned8):
-	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
-	   checking to make sure that we don't access beyond page boundary in
-	   SRC2.  */
-	tst	src1, #7
-	b.eq	L(loop_misaligned)
-L(do_misaligned):
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
-	b.ne	L(done)
-	tst	src1, #7
-	b.ne	L(do_misaligned)
-
-L(loop_misaligned):
-	/* Test if we are within the last dword of the end of a 4K page.  If
-	   yes then jump back to the misaligned loop to copy a byte at a time.  */
-	and	tmp1, src2, #0xff8
-	eor	tmp1, tmp1, #0xff8
-	cbz	tmp1, L(do_misaligned)
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	orr	syndrome, diff, has_nul
-	cbz	syndrome, L(loop_misaligned)
-	b	L(end)
-
-L(done):
-	sub	result, data1, data2
-	ret
-END(strcmp_default)
diff --git a/libc/arch-arm64/default/bionic/strlen.S b/libc/arch-arm64/default/bionic/strlen.S
deleted file mode 100644
index 07c5294..0000000
--- a/libc/arch-arm64/default/bionic/strlen.S
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Copyright (c) 2013-2015, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-	 notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-	 notice, this list of conditions and the following disclaimer in the
-	 documentation and/or other materials provided with the distribution.
-       * Neither the name of the Linaro nor the
-	 names of its contributors may be used to endorse or promote products
-	 derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
- */
-
-#include <private/bionic_asm.h>
-
-/* To test the page crossing code path more thoroughly, compile with
-   -DTEST_PAGE_CROSS - this will force all calls through the slower
-   entry path.  This option is not intended for production use.	 */
-
-/* Arguments and results.  */
-#define srcin		x0
-#define len		x0
-
-/* Locals and temporaries.  */
-#define src		x1
-#define data1		x2
-#define data2		x3
-#define has_nul1	x4
-#define has_nul2	x5
-#define tmp1		x4
-#define tmp2		x5
-#define tmp3		x6
-#define tmp4		x7
-#define zeroones	x8
-
-#define L(l) .L ## l
-
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word. A faster check
-	   (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
-	   false hits for characters 129..255.	*/
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-#ifdef TEST_PAGE_CROSS
-# define MIN_PAGE_SIZE 15
-#else
-# define MIN_PAGE_SIZE 4096
-#endif
-
-	/* Since strings are short on average, we check the first 16 bytes
-	   of the string for a NUL character.  In order to do an unaligned ldp
-	   safely we have to do a page cross check first.  If there is a NUL
-	   byte we calculate the length from the 2 8-byte words using
-	   conditional select to reduce branch mispredictions (it is unlikely
-	   strlen will be repeatedly called on strings with the same length).
-
-	   If the string is longer than 16 bytes, we align src so don't need
-	   further page cross checks, and process 32 bytes per iteration
-	   using the fast NUL check.  If we encounter non-ASCII characters,
-	   fallback to a second loop using the full NUL check.
-
-	   If the page cross check fails, we read 16 bytes from an aligned
-	   address, remove any characters before the string, and continue
-	   in the main loop using aligned loads.  Since strings crossing a
-	   page in the first 16 bytes are rare (probability of
-	   16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
-
-	   AArch64 systems have a minimum page size of 4k.  We don't bother
-	   checking for larger page sizes - the cost of setting up the correct
-	   page size is just not worth the extra gain from a small reduction in
-	   the cases taking the slow path.  Note that we only care about
-	   whether the first fetch, which may be misaligned, crosses a page
-	   boundary.  */
-
-ENTRY(strlen_default)
-	and	tmp1, srcin, MIN_PAGE_SIZE - 1
-	mov	zeroones, REP8_01
-	cmp	tmp1, MIN_PAGE_SIZE - 16
-	b.gt	L(page_cross)
-	ldp	data1, data2, [srcin]
-#ifdef __AARCH64EB__
-	/* For big-endian, carry propagation (if the final byte in the
-	   string is 0x01) means we cannot use has_nul1/2 directly.
-	   Since we expect strings to be small and early-exit,
-	   byte-swap the data now so has_null1/2 will be correct.  */
-	rev	data1, data1
-	rev	data2, data2
-#endif
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, REP8_7f
-	bics	has_nul1, tmp1, tmp2
-	bic	has_nul2, tmp3, tmp4
-	ccmp	has_nul2, 0, 0, eq
-	beq	L(main_loop_entry)
-
-	/* Enter with C = has_nul1 == 0.  */
-	csel	has_nul1, has_nul1, has_nul2, cc
-	mov	len, 8
-	rev	has_nul1, has_nul1
-	clz	tmp1, has_nul1
-	csel	len, xzr, len, cc
-	add	len, len, tmp1, lsr 3
-	ret
-
-	/* The inner loop processes 32 bytes per iteration and uses the fast
-	   NUL check.  If we encounter non-ASCII characters, use a second
-	   loop with the accurate NUL check.  */
-	.p2align 4
-L(main_loop_entry):
-	bic	src, srcin, 15
-	sub	src, src, 16
-L(main_loop):
-	ldp	data1, data2, [src, 32]!
-.Lpage_cross_entry:
-	sub	tmp1, data1, zeroones
-	sub	tmp3, data2, zeroones
-	orr	tmp2, tmp1, tmp3
-	tst	tmp2, zeroones, lsl 7
-	bne	1f
-	ldp	data1, data2, [src, 16]
-	sub	tmp1, data1, zeroones
-	sub	tmp3, data2, zeroones
-	orr	tmp2, tmp1, tmp3
-	tst	tmp2, zeroones, lsl 7
-	beq	L(main_loop)
-	add	src, src, 16
-1:
-	/* The fast check failed, so do the slower, accurate NUL check.	 */
-	orr	tmp2, data1, REP8_7f
-	orr	tmp4, data2, REP8_7f
-	bics	has_nul1, tmp1, tmp2
-	bic	has_nul2, tmp3, tmp4
-	ccmp	has_nul2, 0, 0, eq
-	beq	L(nonascii_loop)
-
-	/* Enter with C = has_nul1 == 0.  */
-L(tail):
-#ifdef __AARCH64EB__
-	/* For big-endian, carry propagation (if the final byte in the
-	   string is 0x01) means we cannot use has_nul1/2 directly.  The
-	   easiest way to get the correct byte is to byte-swap the data
-	   and calculate the syndrome a second time.  */
-	csel	data1, data1, data2, cc
-	rev	data1, data1
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, REP8_7f
-	bic	has_nul1, tmp1, tmp2
-#else
-	csel	has_nul1, has_nul1, has_nul2, cc
-#endif
-	sub	len, src, srcin
-	rev	has_nul1, has_nul1
-	add	tmp2, len, 8
-	clz	tmp1, has_nul1
-	csel	len, len, tmp2, cc
-	add	len, len, tmp1, lsr 3
-	ret
-
-L(nonascii_loop):
-	ldp	data1, data2, [src, 16]!
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, REP8_7f
-	bics	has_nul1, tmp1, tmp2
-	bic	has_nul2, tmp3, tmp4
-	ccmp	has_nul2, 0, 0, eq
-	bne	L(tail)
-	ldp	data1, data2, [src, 16]!
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, REP8_7f
-	bics	has_nul1, tmp1, tmp2
-	bic	has_nul2, tmp3, tmp4
-	ccmp	has_nul2, 0, 0, eq
-	beq	L(nonascii_loop)
-	b	L(tail)
-
-	/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
-	   srcin to 0x7f, so we ignore any NUL bytes before the string.
-	   Then continue in the aligned loop.  */
-L(page_cross):
-	bic	src, srcin, 15
-	ldp	data1, data2, [src]
-	lsl	tmp1, srcin, 3
-	mov	tmp4, -1
-#ifdef __AARCH64EB__
-	/* Big-endian.	Early bytes are at MSB.	 */
-	lsr	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsl	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
-#endif
-	orr	tmp1, tmp1, REP8_80
-	orn	data1, data1, tmp1
-	orn	tmp2, data2, tmp1
-	tst	srcin, 8
-	csel	data1, data1, tmp4, eq
-	csel	data2, data2, tmp2, eq
-	b	L(page_cross_entry)
-
-END(strlen_default)
diff --git a/libc/arch-arm64/default/bionic/strncmp.S b/libc/arch-arm64/default/bionic/strncmp.S
deleted file mode 100644
index 5432b73..0000000
--- a/libc/arch-arm64/default/bionic/strncmp.S
+++ /dev/null
@@ -1,280 +0,0 @@
-/* Copyright (c) 2014, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the Linaro nor the
-         names of its contributors may be used to endorse or promote products
-         derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-#include <private/bionic_asm.h>
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result.  */
-#define src1		x0
-#define src2		x1
-#define limit		x2
-#define result		x0
-
-/* Internal variables.  */
-#define data1		x3
-#define data1w		w3
-#define data2		x4
-#define data2w		w4
-#define has_nul		x5
-#define diff		x6
-#define syndrome	x7
-#define tmp1		x8
-#define tmp2		x9
-#define tmp3		x10
-#define zeroones	x11
-#define pos		x12
-#define limit_wd	x13
-#define mask		x14
-#define endloop		x15
-#define count		mask
-
-	.text
-	.p2align 6
-	.rep 7
-	nop	/* Pad so that the loop below fits a cache line.  */
-	.endr
-ENTRY(strncmp_default)
-	cbz	limit, .Lret0
-	eor	tmp1, src1, src2
-	mov	zeroones, #REP8_01
-	tst	tmp1, #7
-	and	count, src1, #7
-	b.ne	.Lmisaligned8
-	cbnz	count, .Lmutual_align
-	/* Calculate the number of full and partial words -1.  */
-	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
-
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word.  */
-	/* Start of performance-critical section  -- one 64B cache line.  */
-.Lloop_aligned:
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-.Lstart_realigned:
-	subs	limit_wd, limit_wd, #1
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	endloop, #0, #0, eq
-	b.eq	.Lloop_aligned
-	/* End of performance-critical section  -- one 64B cache line.  */
-
-	/* Not reached the limit, must have found the end or a diff.  */
-	tbz	limit_wd, #63, .Lnot_limit
-
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	.Lnot_limit
-
-	lsl	limit, limit, #3	/* Bits -> bytes.  */
-	mov	mask, #~0
-#ifdef __AARCH64EB__
-	lsr	mask, mask, limit
-#else
-	lsl	mask, mask, limit
-#endif
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	/* Make sure that the NUL byte is marked in the syndrome.  */
-	orr	has_nul, has_nul, mask
-
-.Lnot_limit:
-	orr	syndrome, diff, has_nul
-
-#ifndef	__AARCH64EB__
-	rev	syndrome, syndrome
-	rev	data1, data1
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	clz	pos, syndrome
-	rev	data2, data2
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-#else
-	/* For big-endian we cannot use the trick with the syndrome value
-	   as carry-propagation can corrupt the upper bits if the trailing
-	   bytes in the string contain 0x01.  */
-	/* However, if there is no NUL byte in the dword, we can generate
-	   the result directly.  We can't just subtract the bytes as the
-	   MSB might be significant.  */
-	cbnz	has_nul, 1f
-	cmp	data1, data2
-	cset	result, ne
-	cneg	result, result, lo
-	ret
-1:
-	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
-	rev	tmp3, data1
-	sub	tmp1, tmp3, zeroones
-	orr	tmp2, tmp3, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	rev	has_nul, has_nul
-	orr	syndrome, diff, has_nul
-	clz	pos, syndrome
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-#endif
-
-.Lmutual_align:
-	/* Sources are mutually aligned, but are not currently at an
-	   alignment boundary.  Round down the addresses and then mask off
-	   the bytes that precede the start point.
-	   We also need to adjust the limit calculations, but without
-	   overflowing if the limit is near ULONG_MAX.  */
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	ldr	data1, [src1], #8
-	neg	tmp3, count, lsl #3	/* 64 - bits(bytes beyond align). */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
-#endif
-	and	tmp3, limit_wd, #7
-	lsr	limit_wd, limit_wd, #3
-	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
-	add	limit, limit, count
-	add	tmp3, tmp3, count
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	add	limit_wd, limit_wd, tmp3, lsr #3
-	b	.Lstart_realigned
-
-	.p2align 6
-	/* Don't bother with dwords for up to 16 bytes.  */
-.Lmisaligned8:
-	cmp	limit, #16
-	b.hs	.Ltry_misaligned_words
-
-.Lbyte_loop:
-	/* Perhaps we can do better than this.  */
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	limit, limit, #1
-	ccmp	data1w, #1, #0, hi	/* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
-	b.eq	.Lbyte_loop
-.Ldone:
-	sub	result, data1, data2
-	ret
-	/* Align the SRC1 to a dword by doing a bytewise compare and then do
-	   the dword loop.  */
-.Ltry_misaligned_words:
-	lsr	limit_wd, limit, #3
-	cbz	count, .Ldo_misaligned
-
-	neg	count, count
-	and	count, count, #7
-	sub	limit, limit, count
-	lsr	limit_wd, limit, #3
-
-.Lpage_end_loop:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
-	b.ne	.Ldone
-	subs	count, count, #1
-	b.hi	.Lpage_end_loop
-
-.Ldo_misaligned:
-	/* Prepare ourselves for the next page crossing.  Unlike the aligned
-	   loop, we fetch 1 less dword because we risk crossing bounds on
-	   SRC2.  */
-	mov	count, #8
-	subs	limit_wd, limit_wd, #1
-	b.lo	.Ldone_loop
-.Lloop_misaligned:
-	and	tmp2, src2, #0xff8
-	eor	tmp2, tmp2, #0xff8
-	cbz	tmp2, .Lpage_end_loop
-
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	diff, #0, #0, eq
-	b.ne	.Lnot_limit
-	subs	limit_wd, limit_wd, #1
-	b.pl	.Lloop_misaligned
-
-.Ldone_loop:
-	/* We found a difference or a NULL before the limit was reached.  */
-	and	limit, limit, #7
-	cbz	limit, .Lnot_limit
-	/* Read the last word.  */
-	sub	src1, src1, 8
-	sub	src2, src2, 8
-	ldr	data1, [src1, limit]
-	ldr	data2, [src2, limit]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	diff, #0, #0, eq
-	b.ne	.Lnot_limit
-
-.Lret0:
-	mov	result, #0
-	ret
-END(strncmp_default)
diff --git a/libc/arch-arm64/default/bionic/strnlen.S b/libc/arch-arm64/default/bionic/strnlen.S
deleted file mode 100644
index 1694532..0000000
--- a/libc/arch-arm64/default/bionic/strnlen.S
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Copyright (c) 2014, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the Linaro nor the
-         names of its contributors may be used to endorse or promote products
-         derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-#include <private/bionic_asm.h>
-
-/* Arguments and results.  */
-#define srcin		x0
-#define len		x0
-#define limit		x1
-
-/* Locals and temporaries.  */
-#define src		x2
-#define data1		x3
-#define data2		x4
-#define data2a		x5
-#define has_nul1	x6
-#define has_nul2	x7
-#define tmp1		x8
-#define tmp2		x9
-#define tmp3		x10
-#define tmp4		x11
-#define zeroones	x12
-#define pos		x13
-#define limit_wd	x14
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-	.text
-	.p2align	6
-.Lstart:
-	/* Pre-pad to ensure critical loop begins an icache line.  */
-	.rep 7
-	nop
-	.endr
-	/* Put this code here to avoid wasting more space with pre-padding.  */
-.Lhit_limit:
-	mov	len, limit
-	ret
-
-ENTRY(strnlen_default)
-	cbz	limit, .Lhit_limit
-	mov	zeroones, #REP8_01
-	bic	src, srcin, #15
-	ands	tmp1, srcin, #15
-	b.ne	.Lmisaligned
-	/* Calculate the number of full and partial words -1.  */
-	sub	limit_wd, limit, #1	/* Limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #4	/* Convert to Qwords.  */
-
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word.  */
-	/* The inner loop deals with two Dwords at a time.  This has a
-	   slightly higher start-up cost, but we should win quite quickly,
-	   especially on cores with a high number of issue slots per
-	   cycle, as we get much better parallelism out of the operations.  */
-
-	/* Start of critial section -- keep to one 64Byte cache line.  */
-.Lloop:
-	ldp	data1, data2, [src], #16
-.Lrealigned:
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, #REP8_7f
-	bic	has_nul1, tmp1, tmp2
-	bic	has_nul2, tmp3, tmp4
-	subs	limit_wd, limit_wd, #1
-	orr	tmp1, has_nul1, has_nul2
-	ccmp	tmp1, #0, #0, pl	/* NZCV = 0000  */
-	b.eq	.Lloop
-	/* End of critical section -- keep to one 64Byte cache line.  */
-
-	orr	tmp1, has_nul1, has_nul2
-	cbz	tmp1, .Lhit_limit	/* No null in final Qword.  */
-
-	/* We know there's a null in the final Qword.  The easiest thing
-	   to do now is work out the length of the string and return
-	   MIN (len, limit).  */
-
-	sub	len, src, srcin
-	cbz	has_nul1, .Lnul_in_data2
-#ifdef __AARCH64EB__
-	mov	data2, data1
-#endif
-	sub	len, len, #8
-	mov	has_nul2, has_nul1
-.Lnul_in_data2:
-#ifdef __AARCH64EB__
-	/* For big-endian, carry propagation (if the final byte in the
-	   string is 0x01) means we cannot use has_nul directly.  The
-	   easiest way to get the correct byte is to byte-swap the data
-	   and calculate the syndrome a second time.  */
-	rev	data2, data2
-	sub	tmp1, data2, zeroones
-	orr	tmp2, data2, #REP8_7f
-	bic	has_nul2, tmp1, tmp2
-#endif
-	sub	len, len, #8
-	rev	has_nul2, has_nul2
-	clz	pos, has_nul2
-	add	len, len, pos, lsr #3		/* Bits to bytes.  */
-	cmp	len, limit
-	csel	len, len, limit, ls		/* Return the lower value.  */
-	ret
-
-.Lmisaligned:
-	/* Deal with a partial first word.
-	   We're doing two things in parallel here;
-	   1) Calculate the number of words (but avoiding overflow if
-	      limit is near ULONG_MAX) - to do this we need to work out
-	      limit + tmp1 - 1 as a 65-bit value before shifting it;
-	   2) Load and mask the initial data words - we force the bytes
-	      before the ones we are interested in to 0xff - this ensures
-	      early bytes will not hit any zero detection.  */
-	sub	limit_wd, limit, #1
-	neg	tmp4, tmp1
-	cmp	tmp1, #8
-
-	and	tmp3, limit_wd, #15
-	lsr	limit_wd, limit_wd, #4
-	mov	tmp2, #~0
-
-	ldp	data1, data2, [src], #16
-	lsl	tmp4, tmp4, #3		/* Bytes beyond alignment -> bits.  */
-	add	tmp3, tmp3, tmp1
-
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
-#endif
-	add	limit_wd, limit_wd, tmp3, lsr #4
-
-	orr	data1, data1, tmp2
-	orr	data2a, data2, tmp2
-
-	csinv	data1, data1, xzr, le
-	csel	data2, data2, data2a, le
-	b	.Lrealigned
-END(strnlen_default)
diff --git a/libc/arch-arm64/dynamic_function_dispatch.cpp b/libc/arch-arm64/dynamic_function_dispatch.cpp
index 37abea4..5c6d4ed 100644
--- a/libc/arch-arm64/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm64/dynamic_function_dispatch.cpp
@@ -47,16 +47,34 @@
     if (supports_mte(arg->_hwcap2)) {
         RETURN_FUNC(memchr_func, memchr_mte);
     } else {
-        RETURN_FUNC(memchr_func, memchr_default);
+        RETURN_FUNC(memchr_func, __memchr_aarch64);
+    }
+}
+
+typedef int stpcpy_func(char*, const char*);
+DEFINE_IFUNC_FOR(stpcpy) {
+    if (supports_mte(arg->_hwcap2)) {
+        RETURN_FUNC(stpcpy_func, stpcpy_mte);
+    } else {
+        RETURN_FUNC(stpcpy_func, __stpcpy_aarch64);
     }
 }
 
 typedef char* strchr_func(const char*, int);
 DEFINE_IFUNC_FOR(strchr) {
     if (supports_mte(arg->_hwcap2)) {
-        RETURN_FUNC(strchr_func, strchr_mte);
+        RETURN_FUNC(strchr_func, __strchr_aarch64_mte);
     } else {
-        RETURN_FUNC(strchr_func, strchr_default);
+        RETURN_FUNC(strchr_func, __strchr_aarch64);
+    }
+}
+
+typedef char* strchrnul_func(const char*, int);
+DEFINE_IFUNC_FOR(strchrnul) {
+    if (supports_mte(arg->_hwcap2)) {
+        RETURN_FUNC(strchrnul_func, strchrnul_mte);
+    } else {
+        RETURN_FUNC(strchrnul_func, __strchrnul_aarch64);
     }
 }
 
@@ -65,16 +83,25 @@
     if (supports_mte(arg->_hwcap2)) {
         RETURN_FUNC(strcmp_func, strcmp_mte);
     } else {
-        RETURN_FUNC(strcmp_func, strcmp_default);
+        RETURN_FUNC(strcmp_func, __strcmp_aarch64);
+    }
+}
+
+typedef int strcpy_func(char*, const char*);
+DEFINE_IFUNC_FOR(strcpy) {
+    if (supports_mte(arg->_hwcap2)) {
+        RETURN_FUNC(strcpy_func, strcpy_mte);
+    } else {
+        RETURN_FUNC(strcpy_func, __strcpy_aarch64);
     }
 }
 
 typedef size_t strlen_func(const char*);
 DEFINE_IFUNC_FOR(strlen) {
     if (supports_mte(arg->_hwcap2)) {
-        RETURN_FUNC(strlen_func, strlen_mte);
+        RETURN_FUNC(strlen_func, __strlen_aarch64_mte);
     } else {
-        RETURN_FUNC(strlen_func, strlen_default);
+        RETURN_FUNC(strlen_func, __strlen_aarch64);
     }
 }
 
@@ -83,7 +110,7 @@
     if (supports_mte(arg->_hwcap2)) {
         RETURN_FUNC(strncmp_func, strncmp_mte);
     } else {
-        RETURN_FUNC(strncmp_func, strncmp_default);
+        RETURN_FUNC(strncmp_func, __strncmp_aarch64);
     }
 }
 
@@ -92,7 +119,16 @@
     if (supports_mte(arg->_hwcap2)) {
         RETURN_FUNC(strnlen_func, strnlen_mte);
     } else {
-        RETURN_FUNC(strnlen_func, strnlen_default);
+        RETURN_FUNC(strnlen_func, __strnlen_aarch64);
+    }
+}
+
+typedef char* strrchr_func(const char*, int);
+DEFINE_IFUNC_FOR(strrchr) {
+    if (supports_mte(arg->_hwcap2)) {
+        RETURN_FUNC(strrchr_func, strrchr_mte);
+    } else {
+        RETURN_FUNC(strrchr_func, __strrchr_aarch64);
     }
 }
 
diff --git a/libc/arch-arm64/generic/bionic/memcmp.S b/libc/arch-arm64/generic/bionic/memcmp.S
deleted file mode 100644
index bff54ae..0000000
--- a/libc/arch-arm64/generic/bionic/memcmp.S
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses.
- */
-
-#include <private/bionic_asm.h>
-
-#define L(l) .L ## l
-
-/* Parameters and result.  */
-#define src1		x0
-#define src2		x1
-#define limit		x2
-#define result		w0
-
-/* Internal variables.  */
-#define data1		x3
-#define data1w		w3
-#define data1h		x4
-#define data2		x5
-#define data2w		w5
-#define data2h		x6
-#define tmp1		x7
-#define tmp2		x8
-
-/* Small inputs of less than 8 bytes are handled separately.  This allows the
-   main code to be speed up using unaligned loads since there are now at least
-   8 bytes to be compared.  If the first 8 bytes are equal, align src1.
-   This ensures each iteration does at most one unaligned access even if both
-   src1 and src2 are unaligned, and mutually aligned inputs behave as if
-   aligned.  After the main loop, process the last 16 bytes using unaligned
-   accesses.  */
-
-ENTRY(memcmp)
-.p2align 6
-	subs	limit, limit, 8
-	b.lo	L(less8)
-
-	/* Limit >= 8, so check first 8 bytes using unaligned loads.  */
-	ldr	data1, [src1], 8
-	ldr	data2, [src2], 8
-	cmp	data1, data2
-	b.ne	L(return)
-
-	subs	limit, limit, 8
-	b.gt	L(more16)
-
-	ldr	data1, [src1, limit]
-	ldr	data2, [src2, limit]
-	b	L(return)
-
-L(more16):
-	ldr	data1, [src1], 8
-	ldr	data2, [src2], 8
-	cmp	data1, data2
-	bne	L(return)
-
-	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
-	   strings.  */
-	subs	limit, limit, 16
-	b.ls	L(last_bytes)
-
-	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
-	   try to align, so limit it only to strings larger than 128 bytes.  */
-	cmp	limit, 96
-	b.ls	L(loop16)
-
-	/* Align src1 and adjust src2 with bytes not yet done.  */
-	and	tmp1, src1, 15
-	add	limit, limit, tmp1
-	sub	src1, src1, tmp1
-	sub	src2, src2, tmp1
-
-	/* Loop performing 16 bytes per iteration using aligned src1.
-	   Limit is pre-decremented by 16 and must be larger than zero.
-	   Exit if <= 16 bytes left to do or if the data is not equal.  */
-	.p2align 4
-L(loop16):
-	ldp	data1, data1h, [src1], 16
-	ldp	data2, data2h, [src2], 16
-	subs	limit, limit, 16
-	ccmp	data1, data2, 0, hi
-	ccmp	data1h, data2h, 0, eq
-	b.eq	L(loop16)
-
-	cmp	data1, data2
-	bne	L(return)
-	mov	data1, data1h
-	mov	data2, data2h
-	cmp	data1, data2
-	bne	L(return)
-
-	/* Compare last 1-16 bytes using unaligned access.  */
-L(last_bytes):
-	add	src1, src1, limit
-	add	src2, src2, limit
-	ldp	data1, data1h, [src1]
-	ldp	data2, data2h, [src2]
-	cmp     data1, data2
-	bne	L(return)
-	mov	data1, data1h
-	mov	data2, data2h
-	cmp	data1, data2
-
-	/* Compare data bytes and set return value to 0, -1 or 1.  */
-L(return):
-#ifndef __AARCH64EB__
-	rev	data1, data1
-	rev	data2, data2
-#endif
-	cmp     data1, data2
-L(ret_eq):
-	cset	result, ne
-	cneg	result, result, lo
-	ret
-
-	.p2align 4
-	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
-L(less8):
-	adds	limit, limit, 4
-	b.lo	L(less4)
-	ldr	data1w, [src1], 4
-	ldr	data2w, [src2], 4
-	cmp	data1w, data2w
-	b.ne	L(return)
-	sub	limit, limit, 4
-L(less4):
-	adds	limit, limit, 4
-	beq	L(ret_eq)
-L(byte_loop):
-	ldrb	data1w, [src1], 1
-	ldrb	data2w, [src2], 1
-	subs	limit, limit, 1
-	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
-	b.eq	L(byte_loop)
-	sub	result, data1w, data2w
-	ret
-
-END(memcmp)
diff --git a/libc/arch-arm64/generic/bionic/stpcpy.S b/libc/arch-arm64/mte/bionic/stpcpy.S
similarity index 100%
rename from libc/arch-arm64/generic/bionic/stpcpy.S
rename to libc/arch-arm64/mte/bionic/stpcpy.S
diff --git a/libc/arch-arm64/mte/bionic/strchr.cpp b/libc/arch-arm64/mte/bionic/strchrnul.cpp
similarity index 91%
copy from libc/arch-arm64/mte/bionic/strchr.cpp
copy to libc/arch-arm64/mte/bionic/strchrnul.cpp
index 7d394df..4f41a31 100644
--- a/libc/arch-arm64/mte/bionic/strchr.cpp
+++ b/libc/arch-arm64/mte/bionic/strchrnul.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019 The Android Open Source Project
+ * Copyright (C) 2020 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,5 +26,5 @@
  * SUCH DAMAGE.
  */
 
-#define strchr strchr_mte
-#include <bionic/strchr.cpp>
+#define strchrnul strchrnul_mte
+#include <bionic/strchrnul.cpp>
diff --git a/libc/arch-arm64/generic/bionic/strcpy.S b/libc/arch-arm64/mte/bionic/strcpy.S
similarity index 100%
rename from libc/arch-arm64/generic/bionic/strcpy.S
rename to libc/arch-arm64/mte/bionic/strcpy.S
diff --git a/libc/arch-arm64/generic/bionic/string_copy.S b/libc/arch-arm64/mte/bionic/string_copy.S
similarity index 99%
rename from libc/arch-arm64/generic/bionic/string_copy.S
rename to libc/arch-arm64/mte/bionic/string_copy.S
index 2bf969d..8b0b93a 100644
--- a/libc/arch-arm64/generic/bionic/string_copy.S
+++ b/libc/arch-arm64/mte/bionic/string_copy.S
@@ -97,9 +97,9 @@
 #define REP8_80 0x8080808080808080
 
 #if defined(STPCPY)
-ENTRY(stpcpy)
+ENTRY(stpcpy_mte)
 #elif defined(STRCPY)
-ENTRY(strcpy)
+ENTRY(strcpy_mte)
 #endif
     mov     zeroones, #REP8_01
 #if defined(STRCPY)
diff --git a/libc/arch-arm64/mte/bionic/strlen.c b/libc/arch-arm64/mte/bionic/strlen.c
deleted file mode 100644
index de88320..0000000
--- a/libc/arch-arm64/mte/bionic/strlen.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define strlen strlen_mte
-#include <upstream-openbsd/lib/libc/string/strlen.c>
diff --git a/libc/arch-arm64/mte/bionic/strchr.cpp b/libc/arch-arm64/mte/bionic/strrchr.cpp
similarity index 92%
rename from libc/arch-arm64/mte/bionic/strchr.cpp
rename to libc/arch-arm64/mte/bionic/strrchr.cpp
index 7d394df..3201b61 100644
--- a/libc/arch-arm64/mte/bionic/strchr.cpp
+++ b/libc/arch-arm64/mte/bionic/strrchr.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019 The Android Open Source Project
+ * Copyright (C) 2020 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,5 +26,5 @@
  * SUCH DAMAGE.
  */
 
-#define strchr strchr_mte
-#include <bionic/strchr.cpp>
+#define strrchr strrchr_mte
+#include <bionic/strrchr.cpp>
diff --git a/libc/arch-arm64/static_function_dispatch.S b/libc/arch-arm64/static_function_dispatch.S
index 8e3a4c1..c89294b 100644
--- a/libc/arch-arm64/static_function_dispatch.S
+++ b/libc/arch-arm64/static_function_dispatch.S
@@ -34,8 +34,12 @@
 END(name)
 
 FUNCTION_DELEGATE(memchr, memchr_mte)
-FUNCTION_DELEGATE(strchr, strchr_mte)
+FUNCTION_DELEGATE(stpcpy, stpcpy_mte)
+FUNCTION_DELEGATE(strchr, __strchr_aarch64_mte)
+FUNCTION_DELEGATE(strchrnul, strchrnul_mte)
 FUNCTION_DELEGATE(strcmp, strcmp_mte)
-FUNCTION_DELEGATE(strlen, strlen_mte)
+FUNCTION_DELEGATE(strcpy, strcpy_mte)
+FUNCTION_DELEGATE(strlen, __strlen_aarch64_mte)
+FUNCTION_DELEGATE(strrchr, strrchr_mte)
 FUNCTION_DELEGATE(strncmp, strncmp_mte)
 FUNCTION_DELEGATE(strnlen, strnlen_mte)
diff --git a/libc/bionic/android_unsafe_frame_pointer_chase.cpp b/libc/bionic/android_unsafe_frame_pointer_chase.cpp
index 0fb086e..e25867b 100644
--- a/libc/bionic/android_unsafe_frame_pointer_chase.cpp
+++ b/libc/bionic/android_unsafe_frame_pointer_chase.cpp
@@ -57,6 +57,12 @@
 
   auto begin = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
   uintptr_t end = __get_thread()->stack_top;
+
+  stack_t ss;
+  if (sigaltstack(nullptr, &ss) == 0 && (ss.ss_flags & SS_ONSTACK)) {
+    end = reinterpret_cast<uintptr_t>(ss.ss_sp) + ss.ss_size;
+  }
+
   size_t num_frames = 0;
   while (1) {
     auto* frame = reinterpret_cast<frame_record*>(begin);
diff --git a/libc/bionic/gwp_asan_wrappers.cpp b/libc/bionic/gwp_asan_wrappers.cpp
index d3e6a14..e60a2f9 100644
--- a/libc/bionic/gwp_asan_wrappers.cpp
+++ b/libc/bionic/gwp_asan_wrappers.cpp
@@ -177,7 +177,8 @@
 }
 
 static const MallocDispatch gwp_asan_dispatch __attribute__((unused)) = {
-  gwp_asan_calloc,
+  // TODO(b/150456936) - GWP-ASan's calloc is disabled for now.
+  Malloc(calloc),
   gwp_asan_free,
   Malloc(mallinfo),
   gwp_asan_malloc,
diff --git a/libc/bionic/heap_tagging.cpp b/libc/bionic/heap_tagging.cpp
index 62b5f5c..e5e8ec3 100644
--- a/libc/bionic/heap_tagging.cpp
+++ b/libc/bionic/heap_tagging.cpp
@@ -46,7 +46,8 @@
   // or hardware doesn't support MTE, and we will fall back to just enabling tagged pointers in
   // syscall arguments.
   if (prctl(PR_SET_TAGGED_ADDR_CTRL,
-            PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_ASYNC | (1 << PR_MTE_EXCL_SHIFT), 0, 0, 0) == 0) {
+            PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_ASYNC | (0xfffe << PR_MTE_TAG_SHIFT), 0, 0,
+            0) == 0) {
     heap_tagging_level = M_HEAP_TAGGING_LEVEL_ASYNC;
     return;
   }
diff --git a/libc/bionic/ifaddrs.cpp b/libc/bionic/ifaddrs.cpp
index 0e9b544..e89b0bf 100644
--- a/libc/bionic/ifaddrs.cpp
+++ b/libc/bionic/ifaddrs.cpp
@@ -28,7 +28,6 @@
 
 #include <ifaddrs.h>
 
-#include <async_safe/log.h>
 #include <cutils/misc.h>           // FIRST_APPLICATION_UID
 #include <errno.h>
 #include <linux/if_packet.h>
@@ -282,11 +281,9 @@
 
   // Open the netlink socket and ask for all the links and addresses.
   NetlinkConnection nc;
-  // Simulate kernel behavior on R and above: RTM_GETLINK messages can only be
-  // sent by:
+  // SELinux policy only allows RTM_GETLINK messages to be sent by:
   // - System apps
   // - Apps with a target SDK version lower than R
-  // TODO(b/141455849): Remove this check when kernel changes are merged.
   bool getlink_success = false;
   if (getuid() < FIRST_APPLICATION_UID ||
       android_get_application_target_sdk_version() < __ANDROID_API_R__) {
@@ -303,7 +300,6 @@
   }
 
   if (!getlink_success) {
-    async_safe_format_log(ANDROID_LOG_INFO, "ifaddrs", "Failed to send RTM_GETLINK request");
     // If we weren't able to depend on GETLINK messages, it's possible some
     // interfaces never got their name set. Resolve them using if_indextoname or remove them.
     resolve_or_remove_nameless_interfaces(out);
diff --git a/libc/bionic/malloc_heapprofd.cpp b/libc/bionic/malloc_heapprofd.cpp
index b2a9e3e..a1b7c07 100644
--- a/libc/bionic/malloc_heapprofd.cpp
+++ b/libc/bionic/malloc_heapprofd.cpp
@@ -321,10 +321,17 @@
       error_log("%s: heapprod: failed to pthread_setname_np", getprogname());
     }
   }
-  // Get an allocation from libc malloc. If we had a previous dispatch table,
-  // this will come from it - otherwise, we'll get it from the system
-  // allocator.
-  return malloc(bytes);
+  // If we had a previous dispatch table, use that to service the allocation,
+  // otherwise fall back to the native allocator.
+  // `gPreviousDefaultDispatchTable` won't change underneath us, as it's
+  // protected by the `gHeapProfdInitInProgress` lock (which we currently hold).
+  // The lock was originally taken by our caller in `HandleHeapprofdSignal()`,
+  // and will be released by `CommonInstallHooks()` via. our `InitHeapprofd()`
+  // thread that we just created.
+  if (gPreviousDefaultDispatchTable) {
+    return gPreviousDefaultDispatchTable->malloc(bytes);
+  }
+  return NativeAllocatorDispatch()->malloc(bytes);
 }
 
 bool HeapprofdInitZygoteChildProfiling() {
diff --git a/libc/platform/bionic/mte_kernel.h b/libc/platform/bionic/mte_kernel.h
index 2c777c9..984d17d 100644
--- a/libc/platform/bionic/mte_kernel.h
+++ b/libc/platform/bionic/mte_kernel.h
@@ -37,7 +37,7 @@
 
 #ifdef ANDROID_EXPERIMENTAL_MTE
 
-#define HWCAP2_MTE (1 << 10)
+#define HWCAP2_MTE (1 << 18)
 #define PROT_MTE 0x20
 
 #define PR_MTE_TCF_SHIFT 1
@@ -45,10 +45,10 @@
 #define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
 #define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
 #define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
-#define PR_MTE_EXCL_SHIFT 3
-#define PR_MTE_EXCL_MASK (0xffffUL << PR_MTE_EXCL_SHIFT)
+#define PR_MTE_TAG_SHIFT 3
+#define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
 
-#define SEGV_MTEAERR 6
-#define SEGV_MTESERR 7
+#define SEGV_MTEAERR 8
+#define SEGV_MTESERR 9
 
 #endif
diff --git a/tests/android_unsafe_frame_pointer_chase_test.cpp b/tests/android_unsafe_frame_pointer_chase_test.cpp
index dd04c33..7fa50e1 100644
--- a/tests/android_unsafe_frame_pointer_chase_test.cpp
+++ b/tests/android_unsafe_frame_pointer_chase_test.cpp
@@ -18,6 +18,8 @@
 
 #if defined(__BIONIC__)
 
+#include <sys/mman.h>
+
 #include "platform/bionic/android_unsafe_frame_pointer_chase.h"
 
 // Prevent tail calls inside recurse.
@@ -72,21 +74,25 @@
   EXPECT_TRUE(CheckFrames(frames, size));
 }
 
-static void *BacktraceThread(void *) {
+static const char* tester_func() {
   size_t size = recurse(kNumFrames, 0, 0);
 
   uintptr_t frames[kNumFrames + 2];
   size_t size2 = recurse(kNumFrames, frames, kNumFrames + 2);
   if (size2 != size) {
-    return (void*)"size2 != size";
+    return "size2 != size";
   }
 
   if (!CheckFrames(frames, size)) {
-    return (void*)"CheckFrames failed";
+    return "CheckFrames failed";
   }
   return nullptr;
 }
 
+static void* BacktraceThread(void*) {
+  return (void*)tester_func();
+}
+
 TEST(android_unsafe_frame_pointer_chase, pthread) {
   pthread_t t;
   ASSERT_EQ(0, pthread_create(&t, nullptr, BacktraceThread, nullptr));
@@ -95,4 +101,58 @@
   EXPECT_EQ(nullptr, reinterpret_cast<char*>(retval));
 }
 
+static bool g_handler_called;
+static const char* g_handler_tester_result;
+
+static void BacktraceHandler(int) {
+  g_handler_called = true;
+  g_handler_tester_result = tester_func();
+}
+
+static constexpr size_t kStackSize = 16384;
+
+static void* SignalBacktraceThread(void* sp) {
+  stack_t ss;
+  ss.ss_sp = sp;
+  ss.ss_flags = 0;
+  ss.ss_size = kStackSize;
+  sigaltstack(&ss, nullptr);
+
+  struct sigaction s = {};
+  s.sa_handler = BacktraceHandler;
+  s.sa_flags = SA_ONSTACK;
+  sigaction(SIGRTMIN, &s, nullptr);
+
+  raise(SIGRTMIN);
+  return nullptr;
+}
+
+TEST(android_unsafe_frame_pointer_chase, sigaltstack) {
+  // Create threads where the alternate stack appears both after and before the regular stack, and
+  // call android_unsafe_frame_pointer_chase from a signal handler. Without handling for the
+  // alternate signal stack, this would cause false negatives or potential false positives in the
+  // android_unsafe_frame_pointer_chase function.
+  void* stacks =
+      mmap(nullptr, kStackSize * 2, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+
+  for (unsigned i = 0; i != 2; ++i) {
+    pthread_t t;
+    pthread_attr_t attr;
+    ASSERT_EQ(0, pthread_attr_init(&attr));
+    ASSERT_EQ(0, pthread_attr_setstack(&attr, reinterpret_cast<char*>(stacks) + kStackSize * i,
+                                       kStackSize));
+
+    ASSERT_EQ(0, pthread_create(&t, &attr, SignalBacktraceThread,
+                                reinterpret_cast<char*>(stacks) + kStackSize * (1 - i)));
+    void* retval;
+    ASSERT_EQ(0, pthread_join(t, &retval));
+
+    EXPECT_TRUE(g_handler_called);
+    EXPECT_EQ(nullptr, g_handler_tester_result);
+    g_handler_called = false;
+  }
+
+  munmap(stacks, kStackSize * 2);
+}
+
 #endif // __BIONIC__