Add new optimized strlen for arm.

This optimized version is primarily targeted at cortex-a15.

Tested on all nexus devices using the system/extras/libc_test strlen test.
Tested alignments from 1 to 32 that are powers of 2.
Tested that strlen does not cross page boundaries at all alignments.

Speed improvements listed below:

cortex-a15
- Sizes >= 32 bytes, ~75% improvement.
- Sizes >= 1024 bytes, ~250% improvement.

cortex-a9
- Sizes >= 32 bytes, ~75% improvement.
- Sizes >= 1024 bytes, ~85% improvement.

krait
- Sizes >= 32 bytes, ~95% improvement.
- Sizes >= 1024 bytes, ~160% improvement.

Change-Id: I361b1a36ed89ab991f2a8f0abbf0d7416d39c8f5
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index b308b05..1a2185f 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -15,7 +15,6 @@
     arch-arm/bionic/setjmp.S \
     arch-arm/bionic/sigsetjmp.S \
     arch-arm/bionic/strcpy.S \
-    arch-arm/bionic/strlen.c.arm \
     arch-arm/bionic/syscall.S \
     arch-arm/bionic/tgkill.S \
     arch-arm/bionic/tkill.S \
diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S
new file mode 100644
index 0000000..d5b8ba4
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/strlen.S
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+    .syntax unified
+
+    .thumb
+    .thumb_func
+
+ENTRY(strlen)
+    pld [r1, #128]
+    mov r1, r0
+
+    rsb     r3, r0, #0
+    ands    r3, r3, #7
+    beq     mainloop
+
+    // Align to a double word (64 bits).
+    ands    ip, r3, #1
+    beq     align_to_32
+
+    ldrb    r2, [r1], #1
+    cmp     r2, #0
+    beq     update_count_and_return
+
+align_to_32:
+    ands    ip, r3, #2
+    beq     align_to_64
+
+    ldrb    r2, [r1], #1
+    cmp     r2, #0
+    beq     update_count_and_return
+    ldrb    r2, [r1], #1
+    cmp     r2, #0
+    beq     update_count_and_return
+
+align_to_64:
+    ands    ip, r3, #4
+    beq     mainloop
+    ldr     r3, [r1], #4
+
+    sub     ip, r3, #0x01010101
+    bic     ip, ip, r3
+    ands    ip, ip, #0x80808080
+    bne     zero_in_second_register
+
+mainloop:
+    ldrd    r2, r3, [r1], #8
+
+    pld     [r1, #64]
+
+    sub     ip, r2, #0x01010101
+    bic     ip, ip, r2
+    ands    ip, ip, #0x80808080
+    bne     zero_in_first_register
+
+    sub     ip, r3, #0x01010101
+    bic     ip, ip, r3
+    ands    ip, ip, #0x80808080
+    bne     zero_in_second_register
+    b       mainloop
+
+zero_in_first_register:
+    sub     r1, r1, #4
+
+zero_in_second_register:
+    sub     r0, r1, r0
+
+    // Check for zero in byte 0.
+    ands    r1, ip, #0x80
+    beq     check_byte1
+
+    sub     r0, r0, #4
+    bx      lr
+
+check_byte1:
+    // Check for zero in byte 1.
+    ands    r1, ip, #0x8000
+    beq     check_byte2
+
+    sub     r0, r0, #3
+    bx      lr
+
+check_byte2:
+    // Check for zero in byte 2.
+    ands    r1, ip, #0x800000
+    beq     return
+
+    sub     r0, r0, #2
+    bx      lr
+
+update_count_and_return:
+    sub     r0, r1, r0
+
+return:
+    sub     r0, r0, #1
+    bx      lr
+END(strlen)
diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk
index d8193f8..0904e6b 100644
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -1,5 +1,6 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a15/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a15/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a15/bionic/strcmp.S)
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
 
 include bionic/libc/arch-arm/generic/generic.mk
diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk
index 4862933..5c684ed 100644
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -1,5 +1,7 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a9/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a9/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a9/bionic/strcmp.S)
+# Use cortex-a15 version of strlen.
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
 
 include bionic/libc/arch-arm/generic/generic.mk
diff --git a/libc/arch-arm/bionic/strlen.c b/libc/arch-arm/generic/bionic/strlen.c
similarity index 100%
rename from libc/arch-arm/bionic/strlen.c
rename to libc/arch-arm/generic/bionic/strlen.c
diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk
index 358b1e6..18cad9d 100644
--- a/libc/arch-arm/generic/generic.mk
+++ b/libc/arch-arm/generic/generic.mk
@@ -1,3 +1,4 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/generic/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/generic/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/generic/bionic/strcmp.S)
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/generic/bionic/strlen.c)
diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk
index 4847f86..288afbb 100644
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -1,5 +1,7 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/krait/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/krait/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/krait/bionic/strcmp.S)
+# Use cortex-a15 version of strlen.
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
 
 include bionic/libc/arch-arm/generic/generic.mk