Replace bx lr with update of pc from the stack.

When there is arm assembler of this format:

ldmxx sp!, {..., lr} or pop {..., lr}
bx lr

It can be replaced with:

ldmxx sp!, {..., pc} or pop {..., pc}

Change-Id: Ic27048c52f90ac4360ad525daf0361a830dc22a3
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
index bb08b94..966b9b3 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -133,8 +133,7 @@
         strbcs      ip, [r0], #1
         strbcs      lr, [r0], #1
 
-        ldmfd       sp!, {r0, lr}
-        bx          lr
+        ldmfd       sp!, {r0, pc}
 END(MEMCPY_BASE)
 
 ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index 8ee6ac2..48ba815 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -136,8 +136,7 @@
         strbcs      r1, [r0], #1
         strbmi      r1, [r0], #1
         subs        r2, r2, r3
-        popls       {r0, r4-r7, lr}   /* return */
-        bxls        lr
+        popls       {r0, r4-r7, pc}   /* return */
 
         /* align the destination to a cache-line */
         mov         r12, r1
@@ -180,8 +179,7 @@
         strhmi      r1, [r0], #2
         movs        r2, r2, lsl #2
         strbcs      r1, [r0]
-        ldmfd       sp!, {r0, r4-r7, lr}
-        bx          lr
+        ldmfd       sp!, {r0, r4-r7, pc}
 END(__memset_large_copy)
 
         .data
diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S
index c78dbd4..6643d55 100644
--- a/libc/arch-arm/generic/bionic/memcmp.S
+++ b/libc/arch-arm/generic/bionic/memcmp.S
@@ -221,8 +221,7 @@
         bne         8b
 
 9:      /* restore registers and return */
-        ldmfd       sp!, {r4, lr}
-        bx          lr
+        ldmfd       sp!, {r4, pc}
 
 10:     /* process less than 12 bytes */
         cmp         r2, #0
diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S
index ea5a399..65cba4c 100644
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -194,8 +194,7 @@
 
         /* we're done! restore everything and return */
 1:      ldmfd       sp!, {r5-r11}
-        ldmfd       sp!, {r0, r4, lr}
-        bx          lr
+        ldmfd       sp!, {r0, r4, pc}
 
         /********************************************************************/
 
@@ -385,8 +384,7 @@
 
         /* we're done! restore sp and spilled registers and return */
         add         sp,  sp, #28
-        ldmfd       sp!, {r0, r4, lr}
-        bx          lr
+        ldmfd       sp!, {r0, r4, pc}
 END(memcpy)
 
         // Only reached when the __memcpy_chk check fails.
diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S
index d17a9c4..b8eabbf 100644
--- a/libc/arch-arm/generic/bionic/memset.S
+++ b/libc/arch-arm/generic/bionic/memset.S
@@ -82,8 +82,7 @@
         strbcs      r1, [r0], #1
         strbmi      r1, [r0], #1
         subs        r2, r2, r3
-        popls       {r0, r4-r7, lr}    /* return */
-        bxls        lr
+        popls       {r0, r4-r7, pc}    /* return */
 
         /* align the destination to a cache-line */
         mov         r12, r1
@@ -126,8 +125,7 @@
         strhmi      r1, [r0], #2
         movs        r2, r2, lsl #2
         strbcs      r1, [r0]
-        ldmfd       sp!, {r0, r4-r7, lr}
-        bx          lr
+        ldmfd       sp!, {r0, r4-r7, pc}
 END(memset)
 
         .data
diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S
index 035dcf1..6c098ac 100644
--- a/libc/arch-arm/krait/bionic/memcpy_base.S
+++ b/libc/arch-arm/krait/bionic/memcpy_base.S
@@ -118,6 +118,5 @@
         strbcs      ip, [r0], #1
         strbcs      lr, [r0], #1
 
-        ldmfd       sp!, {r0, lr}
-        bx          lr
+        ldmfd       sp!, {r0, pc}
 END(MEMCPY_BASE)