bionic: add support for non-NEON memcpy() on NEON SoCs

Some SoCs that support NEON nevertheless perform better with a non-NEON than a
NEON memcpy().  This patch adds build variable ARCH_ARM_USE_NON_NEON_MEMCPY,
which can be set in  When ARCH_ARM_USE_NON_NEON_MEMCPY is
defined, we compile in the non-NEON optimized memcpy() even if the SoC supports

Change-Id: Ia0e5bee6bad5880ffc5ff8f34a1382d567546cf9
diff --git a/libc/ b/libc/
index dab6523..a403146 100644
--- a/libc/
+++ b/libc/
@@ -478,6 +478,9 @@
   ifeq ($(ARCH_ARM_HAVE_TLS_REGISTER),true)
     libc_common_cflags += -DHAVE_ARM_TLS_REGISTER
+  ifeq ($(ARCH_ARM_USE_NON_NEON_MEMCPY),true)
+    libc_common_cflags += -DARCH_ARM_USE_NON_NEON_MEMCPY
+  endif
 else # !arm
   ifeq ($(TARGET_ARCH),x86)
     libc_crt_target_cflags :=
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index 438fa00..8c0b4d7 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -29,7 +29,7 @@
 #include <machine/cpu-features.h>
 #include <machine/asm.h>
-#if defined(__ARM_NEON__)
+#if defined(__ARM_NEON__) && !defined(ARCH_ARM_USE_NON_NEON_MEMCPY)
         .fpu    neon