Add libc optimizations to bionic for MIPS

Test: Used bionic tests available under bionic/tests folder.
      Tested for mips32r1/mips32r2/mips64r6 on emulators.

Change-Id: I589415ddc496df3f6067ae34cb33ca58b3a1f276
Signed-off-by: Prashant Patil <prashant.patil@imgtec.com>
diff --git a/libc/Android.bp b/libc/Android.bp
index 1ecef1f..5d0e8c7 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -632,7 +632,22 @@
                 "upstream-openbsd/lib/libc/string/strncmp.c",
             ],
         },
-
+        mips: {
+            exclude_srcs: [
+                "upstream-openbsd/lib/libc/string/memchr.c",
+                "upstream-openbsd/lib/libc/string/memmove.c",
+                "upstream-openbsd/lib/libc/string/strcpy.c",
+                "upstream-openbsd/lib/libc/string/strncmp.c",
+            ],
+        },
+        mips64: {
+            exclude_srcs: [
+                "upstream-openbsd/lib/libc/string/memchr.c",
+                "upstream-openbsd/lib/libc/string/memmove.c",
+                "upstream-openbsd/lib/libc/string/strcpy.c",
+                "upstream-openbsd/lib/libc/string/strncmp.c",
+            ],
+        },
         x86: {
             exclude_srcs: [
                 "upstream-openbsd/lib/libc/string/memchr.c",
@@ -1041,9 +1056,16 @@
         mips: {
             srcs: [
                 "arch-mips/string/memcmp.c",
-                "arch-mips/string/memcpy.S",
+                "arch-mips/string/memcpy.c",
                 "arch-mips/string/memset.S",
                 "arch-mips/string/strcmp.S",
+                "arch-mips/string/strncmp.S",
+                "arch-mips/string/strlen.c",
+                "arch-mips/string/strnlen.c",
+                "arch-mips/string/strchr.c",
+                "arch-mips/string/strcpy.c",
+                "arch-mips/string/memchr.c",
+                "arch-mips/string/memmove.c",
 
                 "arch-mips/bionic/__bionic_clone.S",
                 "arch-mips/bionic/cacheflush.cpp",
@@ -1052,25 +1074,25 @@
                 "arch-mips/bionic/setjmp.S",
                 "arch-mips/bionic/syscall.S",
                 "arch-mips/bionic/vfork.S",
-
-                "arch-mips/string/mips_strlen.c",
             ],
-            rev6: {
-                srcs: [
-                    "arch-mips/string/strlen.c",
-                ],
-                exclude_srcs: [
-                    "arch-mips/string/mips_strlen.c",
-                ],
-            },
+            exclude_srcs: [
+                "bionic/strchr.cpp",
+                "bionic/strnlen.c",
+            ],
         },
         mips64: {
             srcs: [
                 "arch-mips/string/memcmp.c",
-                "arch-mips/string/memcpy.S",
+                "arch-mips/string/memcpy.c",
                 "arch-mips/string/memset.S",
                 "arch-mips/string/strcmp.S",
+                "arch-mips/string/strncmp.S",
                 "arch-mips/string/strlen.c",
+                "arch-mips/string/strnlen.c",
+                "arch-mips/string/strchr.c",
+                "arch-mips/string/strcpy.c",
+                "arch-mips/string/memchr.c",
+                "arch-mips/string/memmove.c",
 
                 "arch-mips64/bionic/__bionic_clone.S",
                 "arch-mips64/bionic/_exit_with_stack_teardown.S",
@@ -1079,6 +1101,10 @@
                 "arch-mips64/bionic/vfork.S",
                 "arch-mips64/bionic/stat.cpp",
             ],
+            exclude_srcs: [
+                "bionic/strchr.cpp",
+                "bionic/strnlen.c",
+            ],
         },
 
         x86: {
diff --git a/libc/NOTICE b/libc/NOTICE
index 9f0d2c5..2ce293f 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -4816,38 +4816,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2010 MIPS Technologies, Inc.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-     * Redistributions of source code must retain the above copyright
-       notice, this list of conditions and the following disclaimer.
-     * Redistributions in binary form must reproduce the above copyright
-       notice, this list of conditions and the following disclaimer
-       in the documentation and/or other materials provided with
-       the distribution.
-     * Neither the name of MIPS Technologies Inc. nor the names of its
-       contributors may be used to endorse or promote products derived
-       from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2010 The NetBSD Foundation, Inc.
 All rights reserved.
 
@@ -5344,35 +5312,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2012-2015
-     MIPS Technologies, Inc., California.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2013
      MIPS Technologies, Inc., California.
 
@@ -5586,35 +5525,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2014
-     Imagination Technologies Limited.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
 Copyright (c) 2014 Bob Beck <beck@obtuse.com>
 
@@ -5750,6 +5660,38 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2017 Imagination Technologies.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+     * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+     * Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer
+       in the documentation and/or other materials provided with
+       the distribution.
+     * Neither the name of Imagination Technologies nor the names of its
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c)1999 Citrus Project,
 All rights reserved.
 
diff --git a/libc/arch-mips/string/memchr.c b/libc/arch-mips/string/memchr.c
new file mode 100644
index 0000000..6b4c8cc
--- /dev/null
+++ b/libc/arch-mips/string/memchr.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#define ENABLE_PREFETCH     1
+#define op_t                unsigned long int
+#define op_size             sizeof (op_t)
+
+#if ENABLE_PREFETCH
+#define PREFETCH(addr)  __builtin_prefetch (addr, 0, 1);
+#else
+#define PREFETCH(addr)
+#endif
+
+#if __mips64 || __mips_isa_rev >= 2
+static inline void * __attribute__ ((always_inline))
+do_bytes (const op_t* w, op_t inval)
+{
+  const unsigned char *p = (const unsigned char *) w;
+  op_t outval = 0;
+#if __mips64
+  __asm__ volatile (
+    "dsbh %1, %0 \n\t"
+    "dshd %0, %1 \n\t"
+    "dclz %1, %0 \n\t"
+    : "+r" (inval), "+r" (outval)
+  );
+#else
+  __asm__ volatile (
+    "wsbh %1, %0 \n\t"
+    "rotr %0, %1, 16 \n\t"
+    "clz %1, %0 \n\t"
+    : "+r" (inval), "+r" (outval)
+  );
+#endif
+  p += (outval >> 3);
+  return (void *) p;
+}
+
+#define DO_WORD(in, val) {                        \
+  op_t tmp = ((val - mask_1) & ~val) & mask_128;  \
+  if (tmp != 0)                                   \
+    return do_bytes(in, tmp);                     \
+}
+#else
+static inline void * __attribute__ ((always_inline))
+do_bytes (const op_t* w, unsigned char ch)
+{
+  const unsigned char *p = (const unsigned char *) w;
+  for (; *p != ch; ++p);
+  return (void *) p;
+}
+
+#define DO_WORD(in, val) {                        \
+  op_t tmp = ((val - mask_1) & ~val) & mask_128;  \
+  if (tmp != 0)                                   \
+    return do_bytes(in, ch);                      \
+}
+#endif
+
+#define DO_WORDS(w) {          \
+  op_t* w1 = (op_t*) w;        \
+  op_t val0 = w1[0] ^ mask_c;  \
+  op_t val1 = w1[1] ^ mask_c;  \
+  op_t val2 = w1[2] ^ mask_c;  \
+  op_t val3 = w1[3] ^ mask_c;  \
+  DO_WORD(w1, val0)            \
+  DO_WORD(w1 + 1, val1)        \
+  DO_WORD(w1 + 2, val2)        \
+  DO_WORD(w1 + 3, val3)        \
+}
+
+void *
+memchr (void const *s, int c_in, size_t n) __overloadable
+{
+  if (n != 0) {
+    const unsigned char *p = (const unsigned char *) s;
+    const op_t *w;
+    op_t mask_1, mask_128, mask_c;
+    unsigned char ch = (unsigned char) c_in;
+
+    /*
+     * Check bytewize till initial alignment
+     */
+    for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
+      if (*p == ch)
+        return (void *) p;
+    }
+
+    w = (const op_t *) p;
+
+    mask_c = ch | (ch << 8);
+    mask_c |= mask_c << 16;
+    __asm__ volatile (
+      "li %0, 0x01010101 \n\t"
+      : "=r" (mask_1)
+    );
+#if __mips64
+    mask_1 |= mask_1 << 32;
+    mask_c |= mask_c << 32;
+#endif
+    mask_128 = mask_1 << 7;
+
+    /*
+     * Check op_size byteswize after initial alignment
+     */
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+    PREFETCH (w);
+    PREFETCH (w + 8);
+    while (n >= 24 * op_size) {
+      PREFETCH(w + 16);
+      DO_WORDS(w);
+      DO_WORDS(w + 4);
+      w += 8;
+      n -= 8 * op_size;
+    }
+    while (n >= 8 * op_size) {
+      DO_WORDS(w);
+      DO_WORDS(w + 4);
+      w += 8;
+      n -= 8 * op_size;
+    }
+#else
+    PREFETCH (w);
+    PREFETCH (w + 4);
+    while (n >= 12 * op_size) {
+      PREFETCH(w + 8);
+      DO_WORDS(w);
+      w += 4;
+      n -= 4 * op_size;
+    }
+    while (n >= 4 * op_size) {
+      DO_WORDS(w);
+      w += 4;
+      n -= 4 * op_size;
+    }
+#endif
+
+    while (n >= op_size) {
+      op_t val = *w ^ mask_c;
+      DO_WORD(w, val);
+      w++;
+      n -= op_size;
+    }
+
+    /*
+     * Check bytewize for remaining bytes
+     */
+    p = (const unsigned char *) w;
+    for (; n > 0; --n, ++p) {
+      if (*p == ch)
+        return (void *) p;
+    }
+  }
+  return NULL;
+}
diff --git a/libc/arch-mips/string/memcmp.c b/libc/arch-mips/string/memcmp.c
index 8640954..eb4ad07 100644
--- a/libc/arch-mips/string/memcmp.c
+++ b/libc/arch-mips/string/memcmp.c
@@ -1,51 +1,352 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (c) 2017 Imagination Technologies.
+ *
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+
 #include <string.h>
+#include <stdint.h>
 
-int memcmp(const void *s1, const void *s2, size_t n)
+#define ENABLE_PREFETCH 1
+
+#define STRNG(X) #X
+#define PREFETCH(src_ptr, offset)  \
+  asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses?  */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define SIZEOF_reg_t 4
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct bits
 {
-    const unsigned char*  p1   = s1;
-    const unsigned char*  end1 = p1 + n;
-    const unsigned char*  p2   = s2;
-    int                   d = 0;
+  reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+#undef SIZEOF_reg_t
+#define SIZEOF_reg_t 8
+typedef unsigned long long reg_t;
+typedef struct bits
+{
+    reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
 
-    for (;;) {
-        if (d || p1 >= end1) break;
-        d = (int)*p1++ - (int)*p2++;
+/* This union assumes that small structures can be in registers.  If
+   not, then memory accesses will be done - not optimal, but ok.  */
+typedef union
+{
+  reg_t v;
+  bits_t b;
+} bitfields_t;
 
-        if (d || p1 >= end1) break;
-        d = (int)*p1++ - (int)*p2++;
+#define do_bitfield(__i) \
+  if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
 
-        if (d || p1 >= end1) break;
-        d = (int)*p1++ - (int)*p2++;
+/* pull apart the words to find the first differing unsigned byte.  */
+static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
+{
+  bitfields_t x, y;
+  x.v = a;
+  y.v = b;
+  do_bitfield (0);
+  do_bitfield (1);
+  do_bitfield (2);
+#if SIZEOF_reg_t == 4
+  return x.b.B3 - y.b.B3;
+#else
+  do_bitfield (3);
+  do_bitfield (4);
+  do_bitfield (5);
+  do_bitfield (6);
+  return x.b.B7 - y.b.B7;
+#endif
+}
 
-        if (d || p1 >= end1) break;
-        d = (int)*p1++ - (int)*p2++;
-    }
-    return d;
+/* This code is called when aligning a pointer, there are remaining bytes
+   after doing word compares, or architecture does not have some form
+   of unaligned support.  */
+static inline int __attribute__ ((always_inline))
+do_bytes (const void *a, const void *b, unsigned long len)
+{
+  unsigned char *x = (unsigned char *) a;
+  unsigned char *y = (unsigned char *) b;
+  unsigned long i;
+
+  /* 'len' might be zero here, so preloading the first two values
+     before the loop may access unallocated memory.  */
+  for (i = 0; i < len; i++) {
+    if (*x != *y)
+      return *x - *y;
+    x++;
+    y++;
+  }
+  return 0;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+   the compiler to treat the pointer access as unaligned.  */
+struct ulw
+{
+  reg_t uli;
+} __attribute__ ((packed));
+
+/* first pointer is not aligned while second pointer is.  */
+static int unaligned_words (const struct ulw *a, const reg_t *b,
+                            unsigned long words, unsigned long bytes)
+{
+#if ENABLE_PREFETCH
+  /* prefetch pointer aligned to 32 byte boundary */
+  const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
+  const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
+#endif
+  for (; words >= 16; words -= 8) {
+#if ENABLE_PREFETCH
+    pref_ptr += 8;
+    PREFETCH(pref_ptr, 0);
+    PREFETCH(pref_ptr, 32);
+
+    pref_ptr_a += 8;
+    PREFETCH(pref_ptr_a, 0);
+    PREFETCH(pref_ptr_a, 32);
+#endif
+    reg_t x0 = a[0].uli, x1 = a[1].uli;
+    reg_t x2 = a[2].uli, x3 = a[3].uli;
+    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+
+    x0 = a[4].uli; x1 = a[5].uli;
+    x2 = a[6].uli; x3 = a[7].uli;
+    y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+
+    a += 8;
+    b += 8;
+  }
+
+  for (; words >= 4; words -= 4) {
+    reg_t x0 = a[0].uli, x1 = a[1].uli;
+    reg_t x2 = a[2].uli, x3 = a[3].uli;
+    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+    a += 4;
+    b += 4;
+  }
+
+  /* do remaining words.  */
+  while (words--) {
+    reg_t x0 = a->uli;
+    reg_t y0 = *b;
+    a += 1;
+    b += 1;
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes (a, b, bytes);
+}
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions.  */
+static int unaligned_words (const reg_t *a, const reg_t *b,
+                            unsigned long words, unsigned long bytes)
+{
+  return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned.  */
+static int aligned_words (const reg_t *a, const reg_t *b,
+                          unsigned long words, unsigned long bytes)
+{
+#if ENABLE_PREFETCH
+  /* prefetch pointer aligned to 32 byte boundary */
+  const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
+  const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
+#endif
+
+  for (; words >= 24; words -= 12) {
+#if ENABLE_PREFETCH
+    pref_ptr += 12;
+    PREFETCH(pref_ptr, 0);
+    PREFETCH(pref_ptr, 32);
+    PREFETCH(pref_ptr, 64);
+
+    pref_ptr_a += 12;
+    PREFETCH(pref_ptr_a, 0);
+    PREFETCH(pref_ptr_a, 32);
+    PREFETCH(pref_ptr_a, 64);
+#endif
+    reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
+    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+
+    x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
+    y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+
+    x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
+    y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+
+    a += 12;
+    b += 12;
+  }
+
+  for (; words >= 4; words -= 4) {
+    reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
+    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+    if (x1 != y1)
+      return do_by_bitfields (x1, y1);
+    if (x2 != y2)
+      return do_by_bitfields (x2, y2);
+    if (x3 != y3)
+      return do_by_bitfields (x3, y3);
+    a += 4;
+    b += 4;
+  }
+
+  /* do remaining words.  */
+  while (words--) {
+    reg_t x0 = *a;
+    reg_t y0 = *b;
+    a += 1;
+    b += 1;
+    if (x0 != y0)
+      return do_by_bitfields (x0, y0);
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes (a, b, bytes);
+}
+
+int memcmp (const void *a, const void *b, size_t len)
+{
+  unsigned long bytes, words;
+
+  /* shouldn't hit that often.  */
+  if (len < sizeof (reg_t) * 4) {
+    return do_bytes (a, b, len);
+  }
+
+  /* Align the second pointer to word/dword alignment.
+     Note that the pointer is only 32-bits for o32/n32 ABIs. For
+     n32, loads are done as 64-bit while address remains 32-bit.   */
+  bytes = ((unsigned long) b) % sizeof (reg_t);
+  if (bytes) {
+    int res;
+    bytes = sizeof (reg_t) - bytes;
+    if (bytes > len)
+      bytes = len;
+    res = do_bytes (a, b, bytes);
+    if (res || len == bytes)
+      return res;
+    len -= bytes;
+    a = (const void *) (((unsigned char *) a) + bytes);
+    b = (const void *) (((unsigned char *) b) + bytes);
+  }
+
+  /* Second pointer now aligned.  */
+  words = len / sizeof (reg_t);
+  bytes = len % sizeof (reg_t);
+
+#if HW_UNALIGNED_SUPPORT
+  /* treat possible unaligned first pointer as aligned.  */
+  return aligned_words (a, b, words, bytes);
+#else
+  if (((unsigned long) a) % sizeof (reg_t) == 0) {
+    return aligned_words (a, b, words, bytes);
+  }
+  /* need to use unaligned instructions on first pointer.  */
+  return unaligned_words (a, b, words, bytes);
+#endif
 }
diff --git a/libc/arch-mips/string/memcpy.S b/libc/arch-mips/string/memcpy.S
deleted file mode 100644
index 0b711bd..0000000
--- a/libc/arch-mips/string/memcpy.S
+++ /dev/null
@@ -1,852 +0,0 @@
-/*
- * Copyright (c) 2012-2015
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __ANDROID__
-# include <private/bionic_asm.h>
-# define USE_MEMMOVE_FOR_OVERLAP
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
-# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
-#elif _LIBC
-# include <sysdep.h>
-# include <regdef.h>
-# include <sys/asm.h>
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
-# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
-#elif _COMPILING_NEWLIB
-# include "machine/asm.h"
-# include "machine/regdef.h"
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
-# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
-#else
-# include <regdef.h>
-# include <sys/asm.h>
-#endif
-
-/* Check to see if the MIPS architecture we are compiling for supports
- * prefetching.
- */
-
-#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
-# ifndef DISABLE_PREFETCH
-#  define USE_PREFETCH
-# endif
-#endif
-
-#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
-# ifndef DISABLE_DOUBLE
-#  define USE_DOUBLE
-# endif
-#endif
-
-
-#if __mips_isa_rev > 5
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-#  undef PREFETCH_STORE_HINT
-#  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
-# endif
-# define R6_CODE
-#endif
-
-/* Some asm.h files do not have the L macro definition.  */
-#ifndef L
-# if _MIPS_SIM == _ABIO32
-#  define L(label) $L ## label
-# else
-#  define L(label) .L ## label
-# endif
-#endif
-
-/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
-#ifndef PTR_ADDIU
-# if _MIPS_SIM == _ABIO32
-#  define PTR_ADDIU	addiu
-# else
-#  define PTR_ADDIU	daddiu
-# endif
-#endif
-
-/* Some asm.h files do not have the PTR_SRA macro definition.  */
-#ifndef PTR_SRA
-# if  _MIPS_SIM == _ABIO32
-#  define PTR_SRA	sra
-# else
-#  define PTR_SRA	dsra
-# endif
-#endif
-
-/* New R6 instructions that may not be in asm.h.  */
-#ifndef PTR_LSA
-# if _MIPS_SIM == _ABIO32
-#  define PTR_LSA	lsa
-# else
-#  define PTR_LSA	dlsa
-# endif
-#endif
-
-/*
- * Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
- * prefetches appears to offer a slight preformance advantage.
- *
- * Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
- * or PREFETCH_STORE_STREAMED offers a large performance advantage
- * but PREPAREFORSTORE has some special restrictions to consider.
- *
- * Prefetch with the 'prepare for store' hint does not copy a memory
- * location into the cache, it just allocates a cache line and zeros
- * it out.  This means that if you do not write to the entire cache
- * line before writing it out to memory some data will get zero'ed out
- * when the cache line is written back to memory and data will be lost.
- *
- * Also if you are using this memcpy to copy overlapping buffers it may
- * not behave correctly when using the 'prepare for store' hint.  If you
- * use the 'prepare for store' prefetch on a memory area that is in the
- * memcpy source (as well as the memcpy destination), then you will get
- * some data zero'ed out before you have a chance to read it and data will
- * be lost.
- *
- * If you are going to use this memcpy routine with the 'prepare for store'
- * prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid
- * the problem of running memcpy on overlapping buffers.
- *
- * There are ifdef'ed sections of this memcpy to make sure that it does not
- * do prefetches on cache lines that are not going to be completely written.
- * This code is only needed and only used when PREFETCH_STORE_HINT is set to
- * PREFETCH_HINT_PREPAREFORSTORE.  This code assumes that cache lines are
- * 32 bytes and if the cache line is larger it will not work correctly.
- */
-
-#ifdef USE_PREFETCH
-# define PREFETCH_HINT_LOAD		0
-# define PREFETCH_HINT_STORE		1
-# define PREFETCH_HINT_LOAD_STREAMED	4
-# define PREFETCH_HINT_STORE_STREAMED	5
-# define PREFETCH_HINT_LOAD_RETAINED	6
-# define PREFETCH_HINT_STORE_RETAINED	7
-# define PREFETCH_HINT_WRITEBACK_INVAL	25
-# define PREFETCH_HINT_PREPAREFORSTORE	30
-
-/*
- * If we have not picked out what hints to use at this point use the
- * standard load and store prefetch hints.
- */
-# ifndef PREFETCH_STORE_HINT
-#  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
-# endif
-# ifndef PREFETCH_LOAD_HINT
-#  define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD
-# endif
-
-/*
- * We double everything when USE_DOUBLE is true so we do 2 prefetches to
- * get 64 bytes in that case.  The assumption is that each individual
- * prefetch brings in 32 bytes.
- */
-
-# ifdef USE_DOUBLE
-#  define PREFETCH_CHUNK 64
-#  define PREFETCH_FOR_LOAD(chunk, reg) \
- pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \
- pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg)
-#  define PREFETCH_FOR_STORE(chunk, reg) \
- pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
- pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
-# else
-#  define PREFETCH_CHUNK 32
-#  define PREFETCH_FOR_LOAD(chunk, reg) \
- pref PREFETCH_LOAD_HINT, (chunk)*32(reg)
-#  define PREFETCH_FOR_STORE(chunk, reg) \
- pref PREFETCH_STORE_HINT, (chunk)*32(reg)
-# endif
-/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
- * than PREFETCH_CHUNK, the assumed size of each prefetch.  If the real size
- * of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
- * hint is used, the code will not work correctly.  If PREPAREFORSTORE is not
- * used then MAX_PREFETCH_SIZE does not matter.  */
-# define MAX_PREFETCH_SIZE 128
-/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
- * than 5 on a STORE prefetch and that a single prefetch can never be larger
- * than MAX_PREFETCH_SIZE.  We add the extra 32 when USE_DOUBLE is set because
- * we actually do two prefetches in that case, one 32 bytes after the other.  */
-# ifdef USE_DOUBLE
-#  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
-# else
-#  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
-# endif
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
-    && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
-/* We cannot handle this because the initial prefetches may fetch bytes that
- * are before the buffer being copied.  We start copies with an offset
- * of 4 so avoid this situation when using PREPAREFORSTORE.  */
-#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
-# endif
-#else /* USE_PREFETCH not defined */
-# define PREFETCH_FOR_LOAD(offset, reg)
-# define PREFETCH_FOR_STORE(offset, reg)
-#endif
-
-/* Allow the routine to be named something else if desired.  */
-#ifndef MEMCPY_NAME
-# define MEMCPY_NAME memcpy
-#endif
-
-/* We use these 32/64 bit registers as temporaries to do the copying.  */
-#define REG0 t0
-#define REG1 t1
-#define REG2 t2
-#define REG3 t3
-#if defined(_MIPS_SIM) && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
-# define REG4 t4
-# define REG5 t5
-# define REG6 t6
-# define REG7 t7
-#else
-# define REG4 ta0
-# define REG5 ta1
-# define REG6 ta2
-# define REG7 ta3
-#endif
-
-/* We load/store 64 bits at a time when USE_DOUBLE is true.
- * The C_ prefix stands for CHUNK and is used to avoid macro name
- * conflicts with system header files.  */
-
-#ifdef USE_DOUBLE
-# define C_ST	sd
-# define C_LD	ld
-# if __MIPSEB
-#  define C_LDHI	ldl	/* high part is left in big-endian	*/
-#  define C_STHI	sdl	/* high part is left in big-endian	*/
-#  define C_LDLO	ldr	/* low part is right in big-endian	*/
-#  define C_STLO	sdr	/* low part is right in big-endian	*/
-# else
-#  define C_LDHI	ldr	/* high part is right in little-endian	*/
-#  define C_STHI	sdr	/* high part is right in little-endian	*/
-#  define C_LDLO	ldl	/* low part is left in little-endian	*/
-#  define C_STLO	sdl	/* low part is left in little-endian	*/
-# endif
-# define C_ALIGN	dalign	/* r6 align instruction			*/
-#else
-# define C_ST	sw
-# define C_LD	lw
-# if __MIPSEB
-#  define C_LDHI	lwl	/* high part is left in big-endian	*/
-#  define C_STHI	swl	/* high part is left in big-endian	*/
-#  define C_LDLO	lwr	/* low part is right in big-endian	*/
-#  define C_STLO	swr	/* low part is right in big-endian	*/
-# else
-#  define C_LDHI	lwr	/* high part is right in little-endian	*/
-#  define C_STHI	swr	/* high part is right in little-endian	*/
-#  define C_LDLO	lwl	/* low part is left in little-endian	*/
-#  define C_STLO	swl	/* low part is left in little-endian	*/
-# endif
-# define C_ALIGN	align	/* r6 align instruction			*/
-#endif
-
-/* Bookkeeping values for 32 vs. 64 bit mode.  */
-#ifdef USE_DOUBLE
-# define NSIZE 8
-# define NSIZEMASK 0x3f
-# define NSIZEDMASK 0x7f
-#else
-# define NSIZE 4
-# define NSIZEMASK 0x1f
-# define NSIZEDMASK 0x3f
-#endif
-#define UNIT(unit) ((unit)*NSIZE)
-#define UNITM1(unit) (((unit)*NSIZE)-1)
-
-#ifdef __ANDROID__
-LEAF(MEMCPY_NAME, 0)
-#else
-LEAF(MEMCPY_NAME)
-#endif
-	.set	nomips16
-	.set	noreorder
-/*
- * Below we handle the case where memcpy is called with overlapping src and dst.
- * Although memcpy is not required to handle this case, some parts of Android
- * like Skia rely on such usage. We call memmove to handle such cases.
- */
-#ifdef USE_MEMMOVE_FOR_OVERLAP
-	PTR_SUBU t0,a0,a1
-	PTR_SRA	t2,t0,31
-	xor	t1,t0,t2
-	PTR_SUBU t0,t1,t2
-	sltu	t2,t0,a2
-	beq	t2,zero,L(memcpy)
-	nop
-#if defined(__LP64__)
-	daddiu	sp,sp,-8
-	SETUP_GP64(0,MEMCPY_NAME)
-	LA	t9,memmove
-	RESTORE_GP64
-	jr	t9
-	daddiu	sp,sp,8
-#else
-	LA	t9,memmove
-	jr	t9
-	nop
-#endif
-L(memcpy):
-#endif
-/*
- * If the size is less than 2*NSIZE (8 or 16), go to L(lastb).  Regardless of
- * size, copy dst pointer to v0 for the return value.
- */
-	slti	t2,a2,(2 * NSIZE)
-	bne	t2,zero,L(lastb)
-#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
-	move	v0,zero
-#else
-	move	v0,a0
-#endif
-
-#ifndef R6_CODE
-
-/*
- * If src and dst have different alignments, go to L(unaligned), if they
- * have the same alignment (but are not actually aligned) do a partial
- * load/store to make them aligned.  If they are both already aligned
- * we can start copying at L(aligned).
- */
-	xor	t8,a1,a0
-	andi	t8,t8,(NSIZE-1)		/* t8 is a0/a1 word-displacement */
-	bne	t8,zero,L(unaligned)
-	PTR_SUBU a3, zero, a0
-
-	andi	a3,a3,(NSIZE-1)		/* copy a3 bytes to align a0/a1	  */
-	beq	a3,zero,L(aligned)	/* if a3=0, it is already aligned */
-	PTR_SUBU a2,a2,a3		/* a2 is the remining bytes count */
-
-	C_LDHI	t8,0(a1)
-	PTR_ADDU a1,a1,a3
-	C_STHI	t8,0(a0)
-	PTR_ADDU a0,a0,a3
-
-#else /* R6_CODE */
-
-/*
- * Align the destination and hope that the source gets aligned too.  If it
- * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
- * align instruction.
- */
-	andi	t8,a0,7
-	lapc	t9,L(atable)
-	PTR_LSA	t9,t8,t9,2
-	jrc	t9
-L(atable):
-	bc	L(lb0)
-	bc	L(lb7)
-	bc	L(lb6)
-	bc	L(lb5)
-	bc	L(lb4)
-	bc	L(lb3)
-	bc	L(lb2)
-	bc	L(lb1)
-L(lb7):
-	lb	a3, 6(a1)
-	sb	a3, 6(a0)
-L(lb6):
-	lb	a3, 5(a1)
-	sb	a3, 5(a0)
-L(lb5):
-	lb	a3, 4(a1)
-	sb	a3, 4(a0)
-L(lb4):
-	lb	a3, 3(a1)
-	sb	a3, 3(a0)
-L(lb3):
-	lb	a3, 2(a1)
-	sb	a3, 2(a0)
-L(lb2):
-	lb	a3, 1(a1)
-	sb	a3, 1(a0)
-L(lb1):
-	lb	a3, 0(a1)
-	sb	a3, 0(a0)
-
-	li	t9,8
-	subu	t8,t9,t8
-	PTR_SUBU a2,a2,t8
-	PTR_ADDU a0,a0,t8
-	PTR_ADDU a1,a1,t8
-L(lb0):
-
-	andi	t8,a1,(NSIZE-1)
-	lapc	t9,L(jtable)
-	PTR_LSA	t9,t8,t9,2
-	jrc	t9
-L(jtable):
-	bc	L(aligned)
-	bc	L(r6_unaligned1)
-	bc	L(r6_unaligned2)
-	bc	L(r6_unaligned3)
-# ifdef USE_DOUBLE
-	bc	L(r6_unaligned4)
-	bc	L(r6_unaligned5)
-	bc	L(r6_unaligned6)
-	bc	L(r6_unaligned7)
-# endif
-#endif /* R6_CODE */
-
-L(aligned):
-
-/*
- * Now dst/src are both aligned to (word or double word) aligned addresses
- * Set a2 to count how many bytes we have to copy after all the 64/128 byte
- * chunks are copied and a3 to the dst pointer after all the 64/128 byte
- * chunks have been copied.  We will loop, incrementing a0 and a1 until a0
- * equals a3.
- */
-
-	andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
-	beq	a2,t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */
-	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
-	PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */
-
-/* When in the loop we may prefetch with the 'prepare to store' hint,
- * in this case the a0+x should not be past the "t0-32" address.  This
- * means: for x=128 the last "safe" a0 address is "t0-160".  Alternatively,
- * for x=64 the last "safe" a0 address is "t0-96" In the current version we
- * will use "prefetch hint,128(a0)", so "t0-160" is the limit.
- */
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	PTR_ADDU t0,a0,a2		/* t0 is the "past the end" address */
-	PTR_SUBU t9,t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */
-#endif
-	PREFETCH_FOR_LOAD  (0, a1)
-	PREFETCH_FOR_LOAD  (1, a1)
-	PREFETCH_FOR_LOAD  (2, a1)
-	PREFETCH_FOR_LOAD  (3, a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
-	PREFETCH_FOR_STORE (1, a0)
-	PREFETCH_FOR_STORE (2, a0)
-	PREFETCH_FOR_STORE (3, a0)
-#endif
-#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
-# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
-	sltu    v1,t9,a0
-	bgtz    v1,L(skip_set)
-	nop
-	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
-L(skip_set):
-# else
-	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
-# endif
-#endif
-#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \
-    && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
-	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3)
-# ifdef USE_DOUBLE
-	PTR_ADDIU v0,v0,32
-# endif
-#endif
-L(loop16w):
-	C_LD	t0,UNIT(0)(a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	sltu	v1,t9,a0		/* If a0 > t9 don't use next prefetch */
-	bgtz	v1,L(skip_pref)
-#endif
-	C_LD	t1,UNIT(1)(a1)
-#ifndef R6_CODE
-	PREFETCH_FOR_STORE (4, a0)
-	PREFETCH_FOR_STORE (5, a0)
-#else
-	PREFETCH_FOR_STORE (2, a0)
-#endif
-#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
-	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
-# ifdef USE_DOUBLE
-	PTR_ADDIU v0,v0,32
-# endif
-#endif
-L(skip_pref):
-	C_LD	REG2,UNIT(2)(a1)
-	C_LD	REG3,UNIT(3)(a1)
-	C_LD	REG4,UNIT(4)(a1)
-	C_LD	REG5,UNIT(5)(a1)
-	C_LD	REG6,UNIT(6)(a1)
-	C_LD	REG7,UNIT(7)(a1)
-#ifndef R6_CODE
-	PREFETCH_FOR_LOAD (4, a1)
-#else
-	PREFETCH_FOR_LOAD (3, a1)
-#endif
-	C_ST	t0,UNIT(0)(a0)
-	C_ST	t1,UNIT(1)(a0)
-	C_ST	REG2,UNIT(2)(a0)
-	C_ST	REG3,UNIT(3)(a0)
-	C_ST	REG4,UNIT(4)(a0)
-	C_ST	REG5,UNIT(5)(a0)
-	C_ST	REG6,UNIT(6)(a0)
-	C_ST	REG7,UNIT(7)(a0)
-
-	C_LD	t0,UNIT(8)(a1)
-	C_LD	t1,UNIT(9)(a1)
-	C_LD	REG2,UNIT(10)(a1)
-	C_LD	REG3,UNIT(11)(a1)
-	C_LD	REG4,UNIT(12)(a1)
-	C_LD	REG5,UNIT(13)(a1)
-	C_LD	REG6,UNIT(14)(a1)
-	C_LD	REG7,UNIT(15)(a1)
-#ifndef R6_CODE
-	PREFETCH_FOR_LOAD (5, a1)
-#endif
-	C_ST	t0,UNIT(8)(a0)
-	C_ST	t1,UNIT(9)(a0)
-	C_ST	REG2,UNIT(10)(a0)
-	C_ST	REG3,UNIT(11)(a0)
-	C_ST	REG4,UNIT(12)(a0)
-	C_ST	REG5,UNIT(13)(a0)
-	C_ST	REG6,UNIT(14)(a0)
-	C_ST	REG7,UNIT(15)(a0)
-	PTR_ADDIU a0,a0,UNIT(16)	/* adding 64/128 to dest */
-	bne	a0,a3,L(loop16w)
-	PTR_ADDIU a1,a1,UNIT(16)	/* adding 64/128 to src */
-	move	a2,t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes or
- * 128 bytes to go.  Check for a 32(64) byte chunk and copy if if there
- * is one.  Otherwise jump down to L(chk1w) to handle the tail end of
- * the copy.
- */
-
-L(chkw):
-	PREFETCH_FOR_LOAD (0, a1)
-	andi	t8,a2,NSIZEMASK	/* Is there a 32-byte/64-byte chunk.  */
-				/* The t8 is the reminder count past 32-bytes */
-	beq	a2,t8,L(chk1w)	/* When a2=t8, no 32-byte chunk  */
-	nop
-	C_LD	t0,UNIT(0)(a1)
-	C_LD	t1,UNIT(1)(a1)
-	C_LD	REG2,UNIT(2)(a1)
-	C_LD	REG3,UNIT(3)(a1)
-	C_LD	REG4,UNIT(4)(a1)
-	C_LD	REG5,UNIT(5)(a1)
-	C_LD	REG6,UNIT(6)(a1)
-	C_LD	REG7,UNIT(7)(a1)
-	PTR_ADDIU a1,a1,UNIT(8)
-	C_ST	t0,UNIT(0)(a0)
-	C_ST	t1,UNIT(1)(a0)
-	C_ST	REG2,UNIT(2)(a0)
-	C_ST	REG3,UNIT(3)(a0)
-	C_ST	REG4,UNIT(4)(a0)
-	C_ST	REG5,UNIT(5)(a0)
-	C_ST	REG6,UNIT(6)(a0)
-	C_ST	REG7,UNIT(7)(a0)
-	PTR_ADDIU a0,a0,UNIT(8)
-
-/*
- * Here we have less than 32(64) bytes to copy.  Set up for a loop to
- * copy one word (or double word) at a time.  Set a2 to count how many
- * bytes we have to copy after all the word (or double word) chunks are
- * copied and a3 to the dst pointer after all the (d)word chunks have
- * been copied.  We will loop, incrementing a0 and a1 until a0 equals a3.
- */
-L(chk1w):
-	andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
-	beq	a2,t8,L(lastb)
-	PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */
-	PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */
-
-/* copying in words (4-byte or 8-byte chunks) */
-L(wordCopy_loop):
-	C_LD	REG3,UNIT(0)(a1)
-	PTR_ADDIU a0,a0,UNIT(1)
-	PTR_ADDIU a1,a1,UNIT(1)
-	bne	a0,a3,L(wordCopy_loop)
-	C_ST	REG3,UNIT(-1)(a0)
-
-/* Copy the last 8 (or 16) bytes */
-L(lastb):
-	blez	a2,L(leave)
-	PTR_ADDU a3,a0,a2	/* a3 is the last dst address */
-L(lastbloop):
-	lb	v1,0(a1)
-	PTR_ADDIU a0,a0,1
-	PTR_ADDIU a1,a1,1
-	bne	a0,a3,L(lastbloop)
-	sb	v1,-1(a0)
-L(leave):
-	j	ra
-	nop
-
-#ifndef R6_CODE
-/*
- * UNALIGNED case, got here with a3 = "negu a0"
- * This code is nearly identical to the aligned code above
- * but only the destination (not the source) gets aligned
- * so we need to do partial loads of the source followed
- * by normal stores to the destination (once we have aligned
- * the destination).
- */
-
-L(unaligned):
-	andi	a3,a3,(NSIZE-1)	/* copy a3 bytes to align a0/a1 */
-	beqz	a3,L(ua_chk16w) /* if a3=0, it is already aligned */
-	PTR_SUBU a2,a2,a3	/* a2 is the remining bytes count */
-
-	C_LDHI	v1,UNIT(0)(a1)
-	C_LDLO	v1,UNITM1(1)(a1)
-	PTR_ADDU a1,a1,a3
-	C_STHI	v1,UNIT(0)(a0)
-	PTR_ADDU a0,a0,a3
-
-/*
- *  Now the destination (but not the source) is aligned
- * Set a2 to count how many bytes we have to copy after all the 64/128 byte
- * chunks are copied and a3 to the dst pointer after all the 64/128 byte
- * chunks have been copied.  We will loop, incrementing a0 and a1 until a0
- * equals a3.
- */
-
-L(ua_chk16w):
-	andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
-	beq	a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
-	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
-	PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */
-
-# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	PTR_ADDU t0,a0,a2	  /* t0 is the "past the end" address */
-	PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
-# endif
-	PREFETCH_FOR_LOAD  (0, a1)
-	PREFETCH_FOR_LOAD  (1, a1)
-	PREFETCH_FOR_LOAD  (2, a1)
-# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
-	PREFETCH_FOR_STORE (1, a0)
-	PREFETCH_FOR_STORE (2, a0)
-	PREFETCH_FOR_STORE (3, a0)
-# endif
-# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
-#  if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	sltu    v1,t9,a0
-	bgtz    v1,L(ua_skip_set)
-	nop
-	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
-L(ua_skip_set):
-#  else
-	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
-#  endif
-# endif
-L(ua_loop16w):
-	PREFETCH_FOR_LOAD  (3, a1)
-	C_LDHI	t0,UNIT(0)(a1)
-	C_LDHI	t1,UNIT(1)(a1)
-	C_LDHI	REG2,UNIT(2)(a1)
-# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	sltu	v1,t9,a0
-	bgtz	v1,L(ua_skip_pref)
-# endif
-	C_LDHI	REG3,UNIT(3)(a1)
-	PREFETCH_FOR_STORE (4, a0)
-	PREFETCH_FOR_STORE (5, a0)
-L(ua_skip_pref):
-	C_LDHI	REG4,UNIT(4)(a1)
-	C_LDHI	REG5,UNIT(5)(a1)
-	C_LDHI	REG6,UNIT(6)(a1)
-	C_LDHI	REG7,UNIT(7)(a1)
-	C_LDLO	t0,UNITM1(1)(a1)
-	C_LDLO	t1,UNITM1(2)(a1)
-	C_LDLO	REG2,UNITM1(3)(a1)
-	C_LDLO	REG3,UNITM1(4)(a1)
-	C_LDLO	REG4,UNITM1(5)(a1)
-	C_LDLO	REG5,UNITM1(6)(a1)
-	C_LDLO	REG6,UNITM1(7)(a1)
-	C_LDLO	REG7,UNITM1(8)(a1)
-        PREFETCH_FOR_LOAD (4, a1)
-	C_ST	t0,UNIT(0)(a0)
-	C_ST	t1,UNIT(1)(a0)
-	C_ST	REG2,UNIT(2)(a0)
-	C_ST	REG3,UNIT(3)(a0)
-	C_ST	REG4,UNIT(4)(a0)
-	C_ST	REG5,UNIT(5)(a0)
-	C_ST	REG6,UNIT(6)(a0)
-	C_ST	REG7,UNIT(7)(a0)
-	C_LDHI	t0,UNIT(8)(a1)
-	C_LDHI	t1,UNIT(9)(a1)
-	C_LDHI	REG2,UNIT(10)(a1)
-	C_LDHI	REG3,UNIT(11)(a1)
-	C_LDHI	REG4,UNIT(12)(a1)
-	C_LDHI	REG5,UNIT(13)(a1)
-	C_LDHI	REG6,UNIT(14)(a1)
-	C_LDHI	REG7,UNIT(15)(a1)
-	C_LDLO	t0,UNITM1(9)(a1)
-	C_LDLO	t1,UNITM1(10)(a1)
-	C_LDLO	REG2,UNITM1(11)(a1)
-	C_LDLO	REG3,UNITM1(12)(a1)
-	C_LDLO	REG4,UNITM1(13)(a1)
-	C_LDLO	REG5,UNITM1(14)(a1)
-	C_LDLO	REG6,UNITM1(15)(a1)
-	C_LDLO	REG7,UNITM1(16)(a1)
-        PREFETCH_FOR_LOAD (5, a1)
-	C_ST	t0,UNIT(8)(a0)
-	C_ST	t1,UNIT(9)(a0)
-	C_ST	REG2,UNIT(10)(a0)
-	C_ST	REG3,UNIT(11)(a0)
-	C_ST	REG4,UNIT(12)(a0)
-	C_ST	REG5,UNIT(13)(a0)
-	C_ST	REG6,UNIT(14)(a0)
-	C_ST	REG7,UNIT(15)(a0)
-	PTR_ADDIU a0,a0,UNIT(16)	/* adding 64/128 to dest */
-	bne	a0,a3,L(ua_loop16w)
-	PTR_ADDIU a1,a1,UNIT(16)	/* adding 64/128 to src */
-	move	a2,t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes or
- * 128 bytes to go.  Check for a 32(64) byte chunk and copy if if there
- * is one.  Otherwise jump down to L(ua_chk1w) to handle the tail end of
- * the copy.  */
-
-L(ua_chkw):
-	PREFETCH_FOR_LOAD (0, a1)
-	andi	t8,a2,NSIZEMASK	  /* Is there a 32-byte/64-byte chunk.  */
-				  /* t8 is the reminder count past 32-bytes */
-	beq	a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
-	nop
-	C_LDHI	t0,UNIT(0)(a1)
-	C_LDHI	t1,UNIT(1)(a1)
-	C_LDHI	REG2,UNIT(2)(a1)
-	C_LDHI	REG3,UNIT(3)(a1)
-	C_LDHI	REG4,UNIT(4)(a1)
-	C_LDHI	REG5,UNIT(5)(a1)
-	C_LDHI	REG6,UNIT(6)(a1)
-	C_LDHI	REG7,UNIT(7)(a1)
-	C_LDLO	t0,UNITM1(1)(a1)
-	C_LDLO	t1,UNITM1(2)(a1)
-	C_LDLO	REG2,UNITM1(3)(a1)
-	C_LDLO	REG3,UNITM1(4)(a1)
-	C_LDLO	REG4,UNITM1(5)(a1)
-	C_LDLO	REG5,UNITM1(6)(a1)
-	C_LDLO	REG6,UNITM1(7)(a1)
-	C_LDLO	REG7,UNITM1(8)(a1)
-	PTR_ADDIU a1,a1,UNIT(8)
-	C_ST	t0,UNIT(0)(a0)
-	C_ST	t1,UNIT(1)(a0)
-	C_ST	REG2,UNIT(2)(a0)
-	C_ST	REG3,UNIT(3)(a0)
-	C_ST	REG4,UNIT(4)(a0)
-	C_ST	REG5,UNIT(5)(a0)
-	C_ST	REG6,UNIT(6)(a0)
-	C_ST	REG7,UNIT(7)(a0)
-	PTR_ADDIU a0,a0,UNIT(8)
-/*
- * Here we have less than 32(64) bytes to copy.  Set up for a loop to
- * copy one word (or double word) at a time.
- */
-L(ua_chk1w):
-	andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
-	beq	a2,t8,L(ua_smallCopy)
-	PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */
-	PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */
-
-/* copying in words (4-byte or 8-byte chunks) */
-L(ua_wordCopy_loop):
-	C_LDHI	v1,UNIT(0)(a1)
-	C_LDLO	v1,UNITM1(1)(a1)
-	PTR_ADDIU a0,a0,UNIT(1)
-	PTR_ADDIU a1,a1,UNIT(1)
-	bne	a0,a3,L(ua_wordCopy_loop)
-	C_ST	v1,UNIT(-1)(a0)
-
-/* Copy the last 8 (or 16) bytes */
-L(ua_smallCopy):
-	beqz	a2,L(leave)
-	PTR_ADDU a3,a0,a2	/* a3 is the last dst address */
-L(ua_smallCopy_loop):
-	lb	v1,0(a1)
-	PTR_ADDIU a0,a0,1
-	PTR_ADDIU a1,a1,1
-	bne	a0,a3,L(ua_smallCopy_loop)
-	sb	v1,-1(a0)
-
-	j	ra
-	nop
-
-#else /* R6_CODE */
-
-# if __MIPSEB
-#  define SWAP_REGS(X,Y) X, Y
-#  define ALIGN_OFFSET(N) (N)
-# else
-#  define SWAP_REGS(X,Y) Y, X
-#  define ALIGN_OFFSET(N) (NSIZE-N)
-# endif
-# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
-	andi	REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes.     */ \
-	beq	REG7, a2, L(lastb); /* Check for bytes to copy by word	   */ \
-	PTR_SUBU a3, a2, REG7;	/* a3 is number of bytes to be copied in   */ \
-				/* (d)word chunks.			   */ \
-	move	a2, REG7;	/* a2 is # of bytes to copy byte by byte   */ \
-				/* after word loop is finished.		   */ \
-	PTR_ADDU REG6, a0, a3;	/* REG6 is the dst address after loop.	   */ \
-	PTR_SUBU REG2, a1, t8;	/* REG2 is the aligned src address.	   */ \
-	PTR_ADDU a1, a1, a3;	/* a1 is addr of source after word loop.   */ \
-	C_LD	t0, UNIT(0)(REG2);  /* Load first part of source.	   */ \
-L(r6_ua_wordcopy##BYTEOFFSET):						      \
-	C_LD	t1, UNIT(1)(REG2);  /* Load second part of source.	   */ \
-	C_ALIGN	REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET);	      \
-	PTR_ADDIU a0, a0, UNIT(1);  /* Increment destination pointer.	   */ \
-	PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
-	move	t0, t1;		/* Move second part of source to first.	   */ \
-	bne	a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET);			      \
-	C_ST	REG3, UNIT(-1)(a0);					      \
-	j	L(lastb);						      \
-	nop
-
-	/* We are generating R6 code, the destination is 4 byte aligned and
-	   the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
-           alignment of the source.  */
-
-L(r6_unaligned1):
-	R6_UNALIGNED_WORD_COPY(1)
-L(r6_unaligned2):
-	R6_UNALIGNED_WORD_COPY(2)
-L(r6_unaligned3):
-	R6_UNALIGNED_WORD_COPY(3)
-# ifdef USE_DOUBLE
-L(r6_unaligned4):
-	R6_UNALIGNED_WORD_COPY(4)
-L(r6_unaligned5):
-	R6_UNALIGNED_WORD_COPY(5)
-L(r6_unaligned6):
-	R6_UNALIGNED_WORD_COPY(6)
-L(r6_unaligned7):
-	R6_UNALIGNED_WORD_COPY(7)
-# endif
-#endif /* R6_CODE */
-
-	.set	at
-	.set	reorder
-END(MEMCPY_NAME)
-#ifndef __ANDROID__
-# ifdef _LIBC
-libc_hidden_builtin_def (MEMCPY_NAME)
-# endif
-#endif
diff --git a/libc/arch-mips/string/memcpy.c b/libc/arch-mips/string/memcpy.c
new file mode 100644
index 0000000..68827b6
--- /dev/null
+++ b/libc/arch-mips/string/memcpy.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses?  */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define ENABLE_PREFETCH     1
+
+#if ENABLE_PREFETCH
+#define PREFETCH(addr)  __builtin_prefetch (addr, 0, 1);
+#else
+#define PREFETCH(addr)
+#endif
+
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct
+{
+  reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+typedef unsigned long long reg_t;
+typedef struct
+{
+  reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
+
+typedef union
+{
+  reg_t v;
+  bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(a, i)   \
+  a[i] = bw.b.B##i;     \
+  len--;                \
+  if(!len) return ret;  \
+
+/* This code is called when aligning a pointer, there are remaining bytes
+   after doing word compares, or architecture does not have some form
+   of unaligned support.  */
+static inline void * __attribute__ ((always_inline))
+do_bytes (void *a, const void *b, unsigned long len, void *ret)
+{
+  unsigned char *x = (unsigned char *) a;
+  unsigned char *y = (unsigned char *) b;
+  unsigned long i;
+
+  /* 'len' might be zero here, so preloading the first two values
+     before the loop may access unallocated memory.  */
+  for (i = 0; i < len; i++) {
+    *x = *y;
+    x++;
+    y++;
+  }
+  return ret;
+}
+
+/* This code is called to copy only remaining bytes within word or doubleword */
+static inline void * __attribute__ ((always_inline))
+do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret)
+{
+  unsigned char *x = (unsigned char *) a;
+
+  if(len > 0) {
+    bitfields_t bw;
+    bw.v = *((reg_t*) b);
+
+#if __mips64
+    DO_BYTE(x, 0);
+    DO_BYTE(x, 1);
+    DO_BYTE(x, 2);
+    DO_BYTE(x, 3);
+    DO_BYTE(x, 4);
+    DO_BYTE(x, 5);
+    DO_BYTE(x, 6);
+    DO_BYTE(x, 7);
+#else
+    DO_BYTE(x, 0);
+    DO_BYTE(x, 1);
+    DO_BYTE(x, 2);
+    DO_BYTE(x, 3);
+#endif
+  }
+
+    return ret;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+   the compiler to treat the pointer access as unaligned.  */
+struct ulw
+{
+  reg_t uli;
+} __attribute__ ((packed));
+
+/* first pointer is not aligned while second pointer is.  */
+static void *
+unaligned_words (struct ulw *a, const reg_t * b,
+                 unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+  unsigned long i, words_by_8, words_by_1;
+  words_by_1 = words % 8;
+  words_by_8 = words >> 3;
+  for (; words_by_8 > 0; words_by_8--) {
+    if(words_by_8 != 1)
+      PREFETCH (b + 8);
+    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+    reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];
+    a[0].uli = y0;
+    a[1].uli = y1;
+    a[2].uli = y2;
+    a[3].uli = y3;
+    a[4].uli = y4;
+    a[5].uli = y5;
+    a[6].uli = y6;
+    a[7].uli = y7;
+    a += 8;
+    b += 8;
+  }
+#else
+  unsigned long i, words_by_4, words_by_1;
+  words_by_1 = words % 4;
+  words_by_4 = words >> 2;
+   for (; words_by_4 > 0; words_by_4--) {
+    if(words_by_4 != 1)
+      PREFETCH (b + 4);
+    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+    a[0].uli = y0;
+    a[1].uli = y1;
+    a[2].uli = y2;
+    a[3].uli = y3;
+    a += 4;
+    b += 4;
+  }
+#endif
+
+  /* do remaining words.  */
+  for (i = 0; i < words_by_1; i++) {
+    a->uli = *b;
+    a += 1;
+    b += 1;
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes_remaining (a, b, bytes, ret);
+}
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions.  */
+static void *
+unaligned_words (reg_t * a, const reg_t * b,
+                 unsigned long words, unsigned long bytes, void *ret)
+{
+  unsigned long i;
+  unsigned char *x = (unsigned char *) a;
+
+  for (i = 0; i < words; i++) {
+    bitfields_t bw;
+    bw.v = *((reg_t*) b);
+    x = (unsigned char *) a;
+#if __mips64
+    x[0] = bw.b.B0;
+    x[1] = bw.b.B1;
+    x[2] = bw.b.B2;
+    x[3] = bw.b.B3;
+    x[4] = bw.b.B4;
+    x[5] = bw.b.B5;
+    x[6] = bw.b.B6;
+    x[7] = bw.b.B7;
+#else
+    x[0] = bw.b.B0;
+    x[1] = bw.b.B1;
+    x[2] = bw.b.B2;
+    x[3] = bw.b.B3;
+#endif
+    a += 1;
+    b += 1;
+  }
+
+  /* mop up any remaining bytes */
+  return do_bytes_remaining (a, b, bytes, ret);
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned.  */
+static void *
+aligned_words (reg_t * a, const reg_t * b,
+               unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+  unsigned long i, words_by_8, words_by_1;
+  words_by_1 = words % 8;
+  words_by_8 = words >> 3;
+  for (; words_by_8 > 0; words_by_8--) {
+    if(words_by_8 != 1)
+      PREFETCH (b + 8);
+    reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
+    reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];
+    a[0] = x0;
+    a[1] = x1;
+    a[2] = x2;
+    a[3] = x3;
+    a[4] = x4;
+    a[5] = x5;
+    a[6] = x6;
+    a[7] = x7;
+    a += 8;
+    b += 8;
+  }
+#else
+  unsigned long i, words_by_4, words_by_1;
+  words_by_1 = words % 4;
+  words_by_4 = words >> 2;
+  for (; words_by_4 > 0; words_by_4--) {
+    if(words_by_4 != 1)
+      PREFETCH (b + 4);
+    reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
+    a[0] = x0;
+    a[1] = x1;
+    a[2] = x2;
+    a[3] = x3;
+    a += 4;
+    b += 4;
+  }
+#endif
+
+  /* do remaining words.  */
+  for (i = 0; i < words_by_1; i++) {
+    *a = *b;
+    a += 1;
+    b += 1;
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes_remaining (a, b, bytes, ret);
+}
+
+void *
+memcpy (void *a, const void *b, size_t len) __overloadable
+{
+  unsigned long bytes, words;
+  void *ret = a;
+
+  /* shouldn't hit that often.  */
+  if (len < sizeof (reg_t) * 4) {
+    return do_bytes (a, b, len, a);
+  }
+
+  /* Align the second pointer to word/dword alignment.
+     Note that the pointer is only 32-bits for o32/n32 ABIs. For
+     n32, loads are done as 64-bit while address remains 32-bit.   */
+  bytes = ((unsigned long) b) % sizeof (reg_t);
+  if (bytes) {
+    bytes = sizeof (reg_t) - bytes;
+    if (bytes > len)
+      bytes = len;
+    do_bytes (a, b, bytes, ret);
+    if (len == bytes)
+      return ret;
+    len -= bytes;
+    a = (void *) (((unsigned char *) a) + bytes);
+    b = (const void *) (((unsigned char *) b) + bytes);
+  }
+
+  /* Second pointer now aligned.  */
+  words = len / sizeof (reg_t);
+  bytes = len % sizeof (reg_t);
+#if HW_UNALIGNED_SUPPORT
+  /* treat possible unaligned first pointer as aligned.  */
+  return aligned_words (a, b, words, bytes, ret);
+#else
+  if (((unsigned long) a) % sizeof (reg_t) == 0) {
+    return aligned_words (a, b, words, bytes, ret);
+  }
+  /* need to use unaligned instructions on first pointer.  */
+  return unaligned_words (a, b, words, bytes, ret);
+#endif
+}
diff --git a/libc/arch-mips/string/memmove.c b/libc/arch-mips/string/memmove.c
new file mode 100644
index 0000000..fbff297
--- /dev/null
+++ b/libc/arch-mips/string/memmove.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses?  */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define ENABLE_PREFETCH     1
+
+#if ENABLE_PREFETCH
+#define PREFETCH(addr)  __builtin_prefetch (addr, 0, 1);
+#else
+#define PREFETCH(addr)
+#endif
+
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct
+{
+  reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+typedef unsigned long long reg_t;
+typedef struct
+{
+  reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
+
+typedef union
+{
+  reg_t v;
+  bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(a, i)   \
+  a[i] = bw.b.B##i;     \
+  len--;                \
+  if(!len) return ret;  \
+
+/* This code is called when aligning a pointer, there are remaining bytes
+   after doing word compares, or architecture does not have some form
+   of unaligned support.  */
+static inline void * __attribute__ ((always_inline))
+do_bytes (void *a, const void *b, unsigned long len, void *ret)
+{
+  unsigned char *x = (unsigned char *) a;
+  unsigned char *y = (unsigned char *) b;
+  unsigned long i;
+
+  /* 'len' might be zero here, so preloading the first two values
+     before the loop may access unallocated memory.  */
+  for (i = 0; i < len; i++)
+  {
+    *x = *y;
+    x++;
+    y++;
+  }
+  return ret;
+}
+
+static inline void * __attribute__ ((always_inline))
+do_bytes_backward (void *a, const void *b, unsigned long len, void *ret)
+{
+  unsigned char *x = (unsigned char *) a;
+  unsigned char *y = (unsigned char *) b;
+  unsigned long i;
+
+  /* 'len' might be zero here, so preloading the first two values
+     before the loop may access unallocated memory.  */
+  for (i = 0; i < len; i++) {
+    *--x = *--y;
+  }
+  return ret;
+}
+
+static inline void * __attribute__ ((always_inline))
+do_bytes_aligned (void *a, const void *b, unsigned long len, void *ret)
+{
+  unsigned char *x = (unsigned char *) a;
+
+  if(len > 0) {
+    bitfields_t bw;
+    bw.v = *((reg_t*) b);
+
+#if __mips64
+    DO_BYTE(x, 0);
+    DO_BYTE(x, 1);
+    DO_BYTE(x, 2);
+    DO_BYTE(x, 3);
+    DO_BYTE(x, 4);
+    DO_BYTE(x, 5);
+    DO_BYTE(x, 6);
+    DO_BYTE(x, 7);
+#else
+    DO_BYTE(x, 0);
+    DO_BYTE(x, 1);
+    DO_BYTE(x, 2);
+    DO_BYTE(x, 3);
+#endif
+  }
+
+  return ret;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+   the compiler to treat the pointer access as unaligned.  */
+struct ulw
+{
+  reg_t uli;
+} __attribute__ ((packed));
+
+#define STORE_UNALIGNED_8(a, b)                      \
+{                                                    \
+  reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];  \
+  reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];  \
+  a[0].uli = y0;                                     \
+  a[1].uli = y1;                                     \
+  a[2].uli = y2;                                     \
+  a[3].uli = y3;                                     \
+  a[4].uli = y4;                                     \
+  a[5].uli = y5;                                     \
+  a[6].uli = y6;                                     \
+  a[7].uli = y7;                                     \
+}
+
+#define STORE_UNALIGNED_4(a, b)                      \
+{                                                    \
+  reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];  \
+  a[0].uli = y0;                                     \
+  a[1].uli = y1;                                     \
+  a[2].uli = y2;                                     \
+  a[3].uli = y3;                                     \
+}
+
+/* first pointer is not aligned while second pointer is.  */
+static void *
+unaligned_words_forward (struct ulw *a, const reg_t * b,
+                         unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+  unsigned long i, words_by_8, words_by_1;
+  words_by_1 = words % 8;
+  words_by_8 = words >> 3;
+  for (; words_by_8 > 0; words_by_8--) {
+    if(words_by_8 != 1)
+      PREFETCH (b + 8);
+    STORE_UNALIGNED_8(a, b);
+    a += 8;
+    b += 8;
+  }
+#else
+  unsigned long i, words_by_4, words_by_1;
+  words_by_1 = words % 4;
+  words_by_4 = words >> 2;
+  for (; words_by_4 > 0; words_by_4--) {
+    if(words_by_4 != 1)
+      PREFETCH (b + 4);
+    STORE_UNALIGNED_4(a, b);
+    a += 4;
+    b += 4;
+  }
+#endif
+
+  /* do remaining words.  */
+  for (i = 0; i < words_by_1; i++) {
+    a->uli = *b;
+    a += 1;
+    b += 1;
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes_aligned (a, b, bytes, ret);
+}
+
+static void *
+unaligned_words_backward (struct ulw *a, const reg_t * b,
+                          unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+  unsigned long i, words_by_8, words_by_1;
+  words_by_1 = words % 8;
+  words_by_8 = words >> 3;
+  for (; words_by_8 > 0; words_by_8--) {
+    if(words_by_8 != 1)
+      PREFETCH (b - 16);
+    a -= 8;
+    b -= 8;
+    STORE_UNALIGNED_8(a, b);
+  }
+#else
+  unsigned long i, words_by_4, words_by_1;
+  words_by_1 = words % 4;
+  words_by_4 = words >> 2;
+  for (; words_by_4 > 0; words_by_4--) {
+    if(words_by_4 != 1)
+      PREFETCH (b - 8);
+    a -= 4;
+    b -= 4;
+    STORE_UNALIGNED_4(a, b);
+  }
+#endif
+
+  /* do remaining words.  */
+  for (i = 0; i < words_by_1; i++) {
+    a -= 1;
+    b -= 1;
+    a->uli = *b;
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes_backward (a, b, bytes, ret);
+}
+
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions.  */
+static void *
+unaligned_words_forward (reg_t * a, const reg_t * b,
+                         unsigned long words, unsigned long bytes, void *ret)
+{
+  return do_bytes_aligned (a, b, (sizeof (reg_t) * words) + bytes, ret);
+}
+
+static void *
+unaligned_words_backward (reg_t * a, const reg_t * b,
+                          unsigned long words, unsigned long bytes, void *ret)
+{
+  return do_bytes_backward (a, b, (sizeof (reg_t) * words) + bytes, ret);
+}
+
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned.  */
+
+#define STORE_ALIGNED_8(a, b)                        \
+{                                                    \
+  reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];  \
+  reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];  \
+  a[0] = x0;                                         \
+  a[1] = x1;                                         \
+  a[2] = x2;                                         \
+  a[3] = x3;                                         \
+  a[4] = x4;                                         \
+  a[5] = x5;                                         \
+  a[6] = x6;                                         \
+  a[7] = x7;                                         \
+}
+
+#define STORE_ALIGNED_4(a, b)                        \
+{                                                    \
+  reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];  \
+  a[0] = x0;                                         \
+  a[1] = x1;                                         \
+  a[2] = x2;                                         \
+  a[3] = x3;                                         \
+}
+
+static void *
+aligned_words_forward (reg_t * a, const reg_t * b,
+                       unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+  unsigned long i, words_by_8, words_by_1;
+  words_by_1 = words % 8;
+  words_by_8 = words >> 3;
+  for (; words_by_8 > 0; words_by_8--) {
+    if(words_by_8 != 1)
+      PREFETCH (b + 8);
+    STORE_ALIGNED_8(a, b);
+    a += 8;
+    b += 8;
+  }
+#else
+  unsigned long i, words_by_4, words_by_1;
+  words_by_1 = words % 4;
+  words_by_4 = words >> 2;
+  for (; words_by_4 > 0; words_by_4--) {
+    if(words_by_4 != 1)
+      PREFETCH (b + 4);
+    STORE_ALIGNED_4(a, b);
+    a += 4;
+    b += 4;
+  }
+#endif
+
+  /* do remaining words.  */
+  for (i = 0; i < words_by_1; i++) {
+    *a = *b;
+    a += 1;
+    b += 1;
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes_aligned (a, b, bytes, ret);
+}
+
+
+static void *
+aligned_words_backward (reg_t * a, const reg_t * b,
+                        unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+  unsigned long i, words_by_8, words_by_1;
+  words_by_1 = words % 8;
+  words_by_8 = words >> 3;
+  for (; words_by_8 > 0; words_by_8--) {
+    if(words_by_8 != 1)
+      PREFETCH (b - 16);
+    a -= 8;
+    b -= 8;
+    STORE_ALIGNED_8(a, b);
+  }
+#else
+  unsigned long i, words_by_4, words_by_1;
+  words_by_1 = words % 4;
+  words_by_4 = words >> 2;
+  for (; words_by_4 > 0; words_by_4--) {
+    if(words_by_4 != 1)
+      PREFETCH (b - 8);
+    a -= 4;
+    b -= 4;
+    STORE_ALIGNED_4(a, b);
+  }
+#endif
+
+  /* do remaining words.  */
+  for (i = 0; i < words_by_1; i++) {
+    a -= 1;
+    b -= 1;
+    *a = *b;
+  }
+
+  /* mop up any remaining bytes.  */
+  return do_bytes_backward (a, b, bytes, ret);
+}
+
+void *
+memmove (void *dst0, const void *src0, size_t length) __overloadable
+{
+  unsigned long bytes, words;
+  void *ret = dst0;
+
+  if (length == 0 || dst0 == src0)      /* nothing to do */
+    return dst0;
+
+  if ((unsigned long)dst0 < (unsigned long)src0) {
+    /* Copy forwards. */
+    /* This shouldn't hit that often. */
+    if (length < sizeof (reg_t) * 4) {
+      return do_bytes (dst0, src0, length, ret);
+    }
+
+    /* Align the second pointer to word/dword alignment.
+       Note that the pointer is only 32-bits for o32/n32 ABIs. For
+       n32, loads are done as 64-bit while address remains 32-bit.   */
+    bytes = ((unsigned long) src0) % sizeof (reg_t);
+    if (bytes) {
+      bytes = sizeof (reg_t) - bytes;
+      if (bytes > length)
+        bytes = length;
+      do_bytes (dst0, src0, bytes, ret);
+      if (length == bytes)
+        return ret;
+      length -= bytes;
+      dst0 = (void *) (((unsigned char *) dst0) + bytes);
+      src0 = (const void *) (((unsigned char *) src0) + bytes);
+    }
+
+    /* Second pointer now aligned.  */
+    words = length / sizeof (reg_t);
+    bytes = length % sizeof (reg_t);
+#if HW_UNALIGNED_SUPPORT
+    /* treat possible unaligned first pointer as aligned.  */
+    return aligned_words_forward (dst0, src0, words, bytes, ret);
+#else
+    if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
+      return aligned_words_forward (dst0, src0, words, bytes, ret);
+    }
+    /* need to use unaligned instructions on first pointer.  */
+    return unaligned_words_forward (dst0, src0, words, bytes, ret);
+#endif
+  } else {
+    /* Copy backwards. */
+    dst0 = (void *) (((unsigned char *) dst0) + length);
+    src0 = (const void *) (((unsigned char *) src0) + length);
+
+    /* This shouldn't hit that often. */
+    if (length < sizeof (reg_t) * 4) {
+      return do_bytes_backward (dst0, src0, length, ret);
+    }
+
+    /* Align the second pointer to word/dword alignment.
+       Note that the pointer is only 32-bits for o32/n32 ABIs. For
+       n32, loads are done as 64-bit while address remains 32-bit.   */
+    bytes = ((unsigned long) src0) % sizeof (reg_t);
+    if (bytes) {
+      if (bytes > length)
+        bytes = length;
+      do_bytes_backward (dst0, src0, bytes, ret);
+      if (length == bytes)
+        return ret;
+      length -= bytes;
+      dst0 = (void *) (((unsigned char *) dst0) - bytes);
+      src0 = (const void *) (((unsigned char *) src0) - bytes);
+    }
+
+    words = length / sizeof (reg_t);
+    bytes = length % sizeof (reg_t);
+#if HW_UNALIGNED_SUPPORT
+    /* treat possible unaligned first pointer as aligned.  */
+    return aligned_words_backward ((void *)dst0, (void *)src0, words, bytes, ret);
+#else
+    if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
+      return aligned_words_backward (dst0, src0, words, bytes, ret);
+    }
+    /* need to use unaligned instructions on first pointer.  */
+    return unaligned_words_backward (dst0, src0, words, bytes, ret);
+#endif
+  }
+}
diff --git a/libc/arch-mips/string/mips-string-ops.h b/libc/arch-mips/string/mips-string-ops.h
deleted file mode 100644
index 50f7e3a..0000000
--- a/libc/arch-mips/string/mips-string-ops.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2010 MIPS Technologies, Inc.
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *      * Redistributions of source code must retain the above copyright
- *        notice, this list of conditions and the following disclaimer.
- *      * Redistributions in binary form must reproduce the above copyright
- *        notice, this list of conditions and the following disclaimer
- *        in the documentation and/or other materials provided with
- *        the distribution.
- *      * Neither the name of MIPS Technologies Inc. nor the names of its
- *        contributors may be used to endorse or promote products derived
- *        from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __MIPS_STRING_OPS_H
-#define __MIPS_STRING_OPS_H
-    /* This definition of the byte bitfields uses the
-       assumption that the layout of the bitfields is
-       equivalent to the layout in memory.  Generally,
-       for the MIPS ABIs, this is true. If you compile
-       the strcmp.c file with -DSMOKE_TEST_NEW_STRCMP,
-       this assumption will be tested.
-
-       Also, regardless of char signedness, ANSI C dictates that
-       strcmp() treats each character as unsigned char.  For
-       strlen and the like, signedness doesn't matter.
-
-       Also, this code assumes that there are 8-bits per 'char'.  */
-
-#if __mips64
-typedef struct bits
-{
-  unsigned B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
-} bits_t;
-#else
-typedef struct bits
-{
-  unsigned B0:8, B1:8, B2:8, B3:8;
-} bits_t;
-#endif
-
-#ifndef _ULW
-    /* for MIPS GCC, there is no unaligned builtins - so this code forces
-       the compiler to treat the pointer access as unaligned.  */
-struct ulw
-{
-  unsigned b;
-} __attribute__ ((packed));
-
-#define _ULW(__x) ((struct ulw *) ((char *)(&__x)))->b;
-#endif
-
-/* This union assumes that small structures can be in registers.  If
-   not, then memory accesses will be done - not optimal, but ok.  */
-typedef union
-{
-  unsigned v;
-  bits_t b;
-} bitfields_t;
-
-#ifndef detect_zero
-/* __mips_dsp, __mips_dspr2, and __mips64 are predefined by
-   the compiler, based on command line options.  */
-#if (__mips_dsp || __mips_dspr2) && !__mips64
-#define __mips_using_dsp 1
-
-/* DSP 4-lane (8 unsigned bits per line) subtract and saturate
- * Intrinsic operation. How this works:
- *     Given a 4-byte string of "ABC\0", subtract this as
- *     an unsigned integer from 0x01010101:
- *	   0x01010101
- *       - 0x41424300
- *        -----------
- (         0xbfbebe01 <-- answer without saturation
- *	   0x00000001 <-- answer with saturation
- * When this 4-lane vector is treated as an unsigned int value,
- * a non-zero answer indicates the presence of a zero in the
- * original 4-byte argument.  */
-
-typedef signed char v4i8 __attribute__ ((vector_size (4)));
-
-#define detect_zero(__x,__y,__01s,__80s)\
-       ((unsigned) __builtin_mips_subu_s_qb((v4i8) __01s,(v4i8) __x))
-
-    /* sets all 4 lanes to requested byte.  */
-#define set_byte_lanes(__x) ((unsigned) __builtin_mips_repl_qb(__x))
-
-    /* sets all 4 lanes to 0x01.  */
-#define def_and_set_01(__x) unsigned __x = (unsigned) __builtin_mips_repl_qb(0x01)
-
-    /* sets all 4 lanes to 0x80. Not needed when subu_s.qb used. */
-#define def_and_set_80(__x) /* do nothing */
-
-#else
-    /* this version, originally published in the 80's, uses
-       a reverse-carry-set like determination of the zero byte.
-       The steps are, for __x = 0x31ff0001:
-       __x - _01s = 0x30fdff00
-       ~__x = 0xce00fffe
-       ((__x - _01s) & ~__x) = 0x0000ff00
-       x & _80s = 0x00008000 <- byte 3 was zero
-       Some implementaions naively assume that characters are
-       always 7-bit unsigned ASCII. With that assumption, the
-       "& ~x" is usually discarded. Since character strings
-       are 8-bit, the and is needed to catch the case of
-       a false positive when the byte is 0x80. */
-
-#define detect_zero(__x,__y,_01s,_80s)\
-	((unsigned) (((__x) - _01s) & ~(__x)) & _80s)
-
-#if __mips64
-#define def_and_set_80(__x) unsigned __x =  0x8080808080808080ul
-#define def_and_set_01(__x)  unsigned __x = 0x0101010101010101ul
-#else
-#define def_and_set_80(__x) unsigned __x = 0x80808080ul
-#define def_and_set_01(__x) unsigned __x = 0x01010101ul
-#endif
-
-#endif
-#endif
-
-/* dealing with 'void *' conversions without using extra variables. */
-#define get_byte(__x,__idx) (((unsigned char *) (__x))[__idx])
-#define set_byte(__x,__idx,__fill) ((unsigned char *) (__x))[__idx] = (__fill)
-#define get_word(__x,__idx) (((unsigned *) (__x))[__idx])
-#define set_word(__x,__idx,__fill) ((unsigned *) (__x))[__idx] = (__fill)
-#define inc_ptr_as(__type,__x,__inc) __x = (void *) (((__type) __x) + (__inc))
-#define cvt_ptr_to(__type,__x) ((__type) (__x))
-
-#endif
diff --git a/libc/arch-mips/string/mips_strlen.c b/libc/arch-mips/string/mips_strlen.c
deleted file mode 100644
index f1465f2..0000000
--- a/libc/arch-mips/string/mips_strlen.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2010 MIPS Technologies, Inc.
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *      * Redistributions of source code must retain the above copyright
- *        notice, this list of conditions and the following disclaimer.
- *      * Redistributions in binary form must reproduce the above copyright
- *        notice, this list of conditions and the following disclaimer
- *        in the documentation and/or other materials provided with
- *        the distribution.
- *      * Neither the name of MIPS Technologies Inc. nor the names of its
- *        contributors may be used to endorse or promote products derived
- *        from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include "mips-string-ops.h"
-
-#define do_strlen_word(__av) {\
-    if (detect_zero(x,x,_01s,_80s)) break;\
-    x = __av;\
-    cnt += sizeof (unsigned);\
-    }
-
-#define do_strlen_byte(__x) {\
-  if ((bx.b.B##__x) == 0) break;\
-  ++cnt;\
-  }
-
-#if SMOKE_TEST_MIPS_STRLEN
-#define strlen my_strlen
-#endif
-
-size_t
-strlen (const char *_a) __overloadable
-{
-  int cnt = 0;
-  unsigned x;
-
-  /* align the string to word boundary so we can do word at a time.  */
-  if ((cvt_ptr_to (unsigned, _a) & (sizeof (unsigned) - 1)) != 0)
-    {
-      if ((cvt_ptr_to (unsigned, _a) & 1) != 0)
-	{
-	  if (get_byte (_a, 0) == 0)
-	    return cnt;
-	  /* set bit 1 so 2-bytes are checked and incremented. */
-	  inc_ptr_as (char *, _a, 1);
-	  ++cnt;
-	}
-      if ((cvt_ptr_to (unsigned, _a) & 2) != 0)
-	{
-	  if (get_byte (_a, 0) == 0)
-	    return cnt + 0;
-	  if (get_byte (_a, 1) == 0)
-	    return cnt + 1;
-	  inc_ptr_as (char *, _a, 2);
-	  cnt += 2;
-	}
-    }
-
-#if __mips64
-#error strlen: mips64 check for 4-byte alignment not implemented.
-#endif
-
-  if (1)
-    {
-      def_and_set_01 (_01s);
-      def_and_set_80 (_80s);
-
-      /* as advantagous as it is to performance, this code cannot pre-load
-         the following word, nor can it prefetch the next line at the start
-         of the loop since the string can be at the end of a page with the
-         following page unmapped. There are tests in the suite to catch
-         any attempt to go beyond the current word. */
-      x = get_word (_a, 0);
-      while (1)
-	{
-	  /* doing 8 words should cover most strings.  */
-	  do_strlen_word (get_word (_a, 1));
-	  do_strlen_word (get_word (_a, 2));
-	  do_strlen_word (get_word (_a, 3));
-	  do_strlen_word (get_word (_a, 4));
-	  do_strlen_word (get_word (_a, 5));
-	  do_strlen_word (get_word (_a, 6));
-	  do_strlen_word (get_word (_a, 7));
-	  do_strlen_word (get_word (_a, 8));
-	  inc_ptr_as (unsigned *, _a, 8);
-	}
-    }
-  while (1)
-    {
-      /* pull apart the last word processed and find the zero.  */
-      bitfields_t bx;
-      bx.v = x;
-#if __mips64
-      do_strlen_byte (0);
-      do_strlen_byte (1);
-      do_strlen_byte (2);
-      do_strlen_byte (3);
-      do_strlen_byte (4);
-      do_strlen_byte (5);
-      do_strlen_byte (6);
-#else
-      do_strlen_byte (0);
-      do_strlen_byte (1);
-      do_strlen_byte (2);
-#endif
-      /* last byte is zero */
-      break;
-    }
-  return cnt;
-}
-
-#undef do_strlen_byte
-#undef do_strlen_word
-
-#if SMOKE_TEST_MIPS_STRLEN
-#include <stdio.h>
-char str1[] = "DHRYSTONE PROGRAM, 1'ST STRING";
-char str2[] = "DHRYSTONE PROGRAM, 2'ST STRING";
-
-char str3[] = "another string";
-char str4[] = "another";
-
-char str5[] = "somes tring";
-char str6[] = "somes_tring";
-
-char str7[16], str8[16];
-
-static char *
-chk (unsigned mine, unsigned libs, int *errors)
-{
-  static char answer[1024];
-  char *result = mine == libs ? "PASS" : "FAIL";
-  sprintf (answer, "new_strlen=%d: lib_strlen=%d: %s!", mine, libs, result);
-  if (mine != libs)
-    (*errors)++;
-  return answer;
-}
-
-int
-main (int argc, char **argv)
-{
-  int errors = 0;
-  /* set -1 in one position */
-  str6[5] = 0xff;
-  /* set zero in same position with junk in following 3 */
-  str7[0] = str8[0] = 0;
-  str7[1] = 0xff;
-  str7[2] = 'a';
-  str7[3] = 2;
-  str8[1] = 's';
-  str8[2] = -2;
-  str8[3] = 0;
-
-  fprintf (stderr, "========== mips_strlen%s test...\n",
-	   argv[0] ? argv[0] : "unknown strlen");
-#define P(__x,__y) {\
-    int a = my_strlen(__x + __y);\
-    int b = (strlen)(__x + __y) /* library version */;\
-    fprintf(stderr,"%s+%d: %s\n",#__x,__y,chk(a,b,&errors));\
-    }
-
-  P (str1, 0);
-  P (str1, 1);
-  P (str1, 2);
-  P (str1, 3);
-
-  P (str2, 0);
-  P (str2, 1);
-  P (str2, 2);
-  P (str2, 3);
-
-  P (str3, 0);
-  P (str3, 1);
-  P (str3, 2);
-  P (str3, 3);
-
-  P (str4, 0);
-  P (str4, 1);
-  P (str4, 2);
-  P (str4, 3);
-
-  P (str5, 0);
-  P (str5, 1);
-  P (str5, 2);
-  P (str5, 3);
-
-  P (str6, 0);
-  P (str6, 1);
-  P (str6, 2);
-  P (str6, 3);
-
-  P (str7, 0);
-  P (str7, 1);
-  P (str7, 2);
-  P (str7, 3);
-
-  P (str8, 0);
-  P (str8, 1);
-  P (str8, 2);
-  P (str8, 3);
-
-  return errors;
-}
-#endif
diff --git a/libc/arch-mips/string/strchr.c b/libc/arch-mips/string/strchr.c
new file mode 100644
index 0000000..c9397e7
--- /dev/null
+++ b/libc/arch-mips/string/strchr.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#define op_t        unsigned long int
+#define op_size     sizeof (op_t)
+
+#if __mips64
+typedef struct
+{
+  op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#else
+typedef struct
+{
+  op_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#endif
+
+typedef union
+{
+  op_t v;
+  bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(i)                  \
+  if (a.b.B##i != ch) {             \
+    if(a.b.B##i == '\0') return 0;  \
+    p++;                            \
+  } else                            \
+    return (char *)p;
+
+#define DO_WORD(w, cnt) {                            \
+  op_t val = w[cnt] ^ mask_c;                        \
+  if ((((w[cnt] - mask_1) & ~w[cnt]) & mask_128) ||  \
+    (((val - mask_1) & ~val) & mask_128)) {          \
+    return do_bytes(w + cnt, ch);                    \
+  }                                                  \
+}
+
+static inline char * __attribute__ ((always_inline))
+do_bytes (const op_t* w, unsigned char ch)
+{
+  bitfields_t a;
+  unsigned char* p = (unsigned char *) w;
+  a.v = *w;
+#if __mips64
+  DO_BYTE(0)
+  DO_BYTE(1)
+  DO_BYTE(2)
+  DO_BYTE(3)
+  DO_BYTE(4)
+  DO_BYTE(5)
+  DO_BYTE(6)
+  DO_BYTE(7)
+#else
+  DO_BYTE(0)
+  DO_BYTE(1)
+  DO_BYTE(2)
+  DO_BYTE(3)
+#endif
+  return (char *)p;
+}
+
+char* strchr(const char* s, int c) __overloadable
+{
+  const op_t *w;
+  op_t mask_1, mask_128, mask_c;
+  const unsigned char ch = c;
+  unsigned char* p = (unsigned char *) s;
+
+  /*
+   * Check byte by byte till initial alignment
+   */
+  for ( ; *p != ch && ((size_t) p % op_size) != 0; p++)
+    if (*p == '\0')
+      return 0;
+
+  if (*p != ch) {
+    w = (const op_t *) p;
+
+    mask_c = ch | (ch << 8);
+    mask_c |= mask_c << 16;
+    __asm__ volatile (
+      "li %0, 0x01010101 \n\t"
+      : "=r" (mask_1)
+    );
+#if __mips64
+    mask_1 |= mask_1 << 32;
+    mask_c |= mask_c << 32;
+#endif
+    mask_128 = mask_1 << 7;
+
+    /*
+     * Check word/dword wize after initial alignment till character match
+     * or end of string
+     */
+    while (1) {
+      DO_WORD(w, 0)
+      DO_WORD(w, 1)
+      DO_WORD(w, 2)
+      DO_WORD(w, 3)
+      w += 4;
+    }
+  }
+
+  return (char *)p;
+}
diff --git a/libc/arch-mips/string/strcmp.S b/libc/arch-mips/string/strcmp.S
index 2b67f5a..e1faf2d 100644
--- a/libc/arch-mips/string/strcmp.S
+++ b/libc/arch-mips/string/strcmp.S
@@ -1,30 +1,33 @@
 /*
- * Copyright (c) 2014
- *      Imagination Technologies Limited.
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
  *
- * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifdef __ANDROID__
@@ -41,6 +44,22 @@
 # include <sys/asm.h>
 #endif
 
+#if __mips64
+# define NSIZE 8
+# define LW ld
+# define EXT dext
+# define SRL dsrl
+# define SLL dsll
+# define SUBU dsubu
+#else
+# define NSIZE 4
+# define LW lw
+# define EXT ext
+# define SRL srl
+# define SLL sll
+# define SUBU subu
+#endif
+
 /* Technically strcmp should not read past the end of the strings being
    compared.  We will read a full word that may contain excess bits beyond
    the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
@@ -77,6 +96,23 @@
 # endif
 #endif
 
+/* It might seem better to do the 'beq' instruction between the two 'lbu'
+   instructions so that the nop is not needed but testing showed that this
+   code is actually faster (based on glibc strcmp test).  */
+#define BYTECMP01(OFFSET) \
+    lbu v0, OFFSET(a0); \
+    lbu v1, OFFSET(a1); \
+    beq v0, zero, L(bexit01); \
+    nop; \
+    bne v0, v1, L(bexit01)
+
+#define BYTECMP89(OFFSET) \
+    lbu t8, OFFSET(a0); \
+    lbu t9, OFFSET(a1); \
+    beq t8, zero, L(bexit89); \
+    nop;    \
+    bne t8, t9, L(bexit89)
+
 /* Allow the routine to be named something else if desired.  */
 #ifndef STRCMP_NAME
 # define STRCMP_NAME strcmp
@@ -87,170 +123,236 @@
 #else
 LEAF(STRCMP_NAME)
 #endif
-	.set	nomips16
-	.set	noreorder
+    .set    nomips16
+    .set    noreorder
 
-	or	t0, a0, a1
-	andi	t0,0x3
-	bne	t0, zero, L(byteloop)
+    andi t1, a1, (NSIZE - 1)
+    beqz t1, L(exitalign)
+    or   t0, zero, NSIZE
+    SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
 
-/* Both strings are 4 byte aligned at this point.  */
+L(alignloop): #do by bytes until a1 aligned
+    BYTECMP01(0)
+    SUBU t1, t1, 0x1
+    PTR_ADDIU a0, a0, 0x1
+    bnez  t1, L(alignloop)
+    PTR_ADDIU a1, a1, 0x1
 
-	lui	t8, 0x0101
-	ori	t8, t8, 0x0101
-	lui	t9, 0x7f7f
-	ori	t9, 0x7f7f
+L(exitalign):
 
-#define STRCMP32(OFFSET) \
-	lw	v0, OFFSET(a0); \
-	lw	v1, OFFSET(a1); \
-	subu	t0, v0, t8; \
-	bne	v0, v1, L(worddiff); \
-	nor	t1, v0, t9; \
-	and	t0, t0, t1; \
-	bne	t0, zero, L(returnzero)
+/* string a1 is NSIZE byte aligned at this point. */
+
+    lui t8, 0x0101
+    ori t8, 0x0101
+    lui t9, 0x7f7f
+    ori t9, 0x7f7f
+#if __mips64
+    dsll t1, t8, 32
+    or  t8, t1
+    dsll t1, t9, 32
+    or  t9, t1
+#endif
+
+    andi t2, a0, (NSIZE - 1) #check if a0 aligned
+    SUBU t3, t0, t2 #t3 will be used as shifter
+    bnez t2, L(uloopenter)
+    SUBU a2, a0, t2 #bring back a0 to aligned position
+
+#define STRCMPW(OFFSET) \
+    LW   v0, OFFSET(a0); \
+    LW   v1, OFFSET(a1); \
+    SUBU t0, v0, t8; \
+    bne  v0, v1, L(worddiff); \
+    nor  t1, v0, t9; \
+    and  t0, t0, t1; \
+    bne  t0, zero, L(returnzero);\
 
 L(wordloop):
-	STRCMP32(0)
-	DELAY_READ
-	STRCMP32(4)
-	DELAY_READ
-	STRCMP32(8)
-	DELAY_READ
-	STRCMP32(12)
-	DELAY_READ
-	STRCMP32(16)
-	DELAY_READ
-	STRCMP32(20)
-	DELAY_READ
-	STRCMP32(24)
-	DELAY_READ
-	STRCMP32(28)
-	PTR_ADDIU a0, a0, 32
-	b	L(wordloop)
-	PTR_ADDIU a1, a1, 32
+    STRCMPW(0 * NSIZE)
+    DELAY_READ
+    STRCMPW(1 * NSIZE)
+    DELAY_READ
+    STRCMPW(2 * NSIZE)
+    DELAY_READ
+    STRCMPW(3 * NSIZE)
+    DELAY_READ
+    STRCMPW(4 * NSIZE)
+    DELAY_READ
+    STRCMPW(5 * NSIZE)
+    DELAY_READ
+    STRCMPW(6 * NSIZE)
+    DELAY_READ
+    STRCMPW(7 * NSIZE)
+    PTR_ADDIU a0, a0, (8 * NSIZE)
+    b   L(wordloop)
+    PTR_ADDIU a1, a1, (8 * NSIZE)
+
+#define USTRCMPW(OFFSET) \
+    LW  v1, OFFSET(a1); \
+    SUBU    t0, v0, t8; \
+    nor t1, v0, t9; \
+    and t0, t0, t1; \
+    bne t0, zero, L(worddiff); \
+    SRL v0, t2; \
+    LW  a3, (OFFSET + NSIZE)(a2); \
+    SUBU    t0, v1, t8; \
+    SLL t1, a3, t3; \
+    or v0, v0, t1; \
+    bne v0, v1, L(worddiff); \
+    nor t1, v1, t9; \
+    and t0, t0, t1; \
+    bne t0, zero, L(returnzero); \
+    move v0, a3;\
+
+L(uloopenter):
+    LW  v0, 0(a2)
+    SLL t2, 3  #multiply by 8
+    SLL t3, 3  #multiply by 8
+    li  a3, -1 #all 1s
+    SRL a3, t3
+    or v0, a3 #replace with all 1s if zeros in unintented read
+
+L(uwordloop):
+    USTRCMPW(0 * NSIZE)
+    USTRCMPW(1 * NSIZE)
+    USTRCMPW(2 * NSIZE)
+    USTRCMPW(3 * NSIZE)
+    USTRCMPW(4 * NSIZE)
+    USTRCMPW(5 * NSIZE)
+    USTRCMPW(6 * NSIZE)
+    USTRCMPW(7 * NSIZE)
+    PTR_ADDIU a2, a2, (8 * NSIZE)
+    b   L(uwordloop)
+    PTR_ADDIU a1, a1, (8 * NSIZE)
 
 L(returnzero):
-	j	ra
-	move	v0, zero
+    j   ra
+    move    v0, zero
+
+#if __mips_isa_rev > 1
+#define EXT_COMPARE01(POS) \
+    EXT t0, v0, POS, 8; \
+    beq t0, zero, L(wexit01); \
+    EXT t1, v1, POS, 8; \
+    bne t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+    EXT t8, v0, POS, 8; \
+    beq t8, zero, L(wexit89); \
+    EXT t9, v1, POS, 8; \
+    bne t8, t9, L(wexit89)
+#else
+#define EXT_COMPARE01(POS) \
+    SRL  t0, v0, POS; \
+    SRL  t1, v1, POS; \
+    andi t0, t0, 0xff; \
+    beq  t0, zero, L(wexit01); \
+    andi t1, t1, 0xff; \
+    bne  t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+    SRL  t8, v0, POS; \
+    SRL  t9, v1, POS; \
+    andi t8, t8, 0xff; \
+    beq  t8, zero, L(wexit89); \
+    andi t9, t9, 0xff; \
+    bne  t8, t9, L(wexit89)
+#endif
 
 L(worddiff):
 #ifdef USE_CLZ
-	subu	t0, v0, t8
-	nor	t1, v0, t9
-	and	t1, t0, t1
-	xor	t0, v0, v1
-	or	t0, t0, t1
+    SUBU    t0, v0, t8
+    nor t1, v0, t9
+    and t1, t0, t1
+    xor t0, v0, v1
+    or  t0, t0, t1
 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-	wsbh	t0, t0
-	rotr	t0, t0, 16
+    wsbh    t0, t0
+    rotr    t0, t0, 16
 # endif
-	clz	t1, t0
-	and	t1, 0xf8
+    clz t1, t0
+    and t1, 0xf8
 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-	neg	t1
-	addu	t1, 24
+    neg t1
+    addu    t1, 24
 # endif
-	rotrv	v0, v0, t1
-	rotrv	v1, v1, t1
-	and	v0, v0, 0xff
-	and	v1, v1, 0xff
-	j	ra
-	subu	v0, v0, v1
+    rotrv   v0, v0, t1
+    rotrv   v1, v1, t1
+    and v0, v0, 0xff
+    and v1, v1, 0xff
+    j   ra
+    SUBU    v0, v0, v1
 #else /* USE_CLZ */
 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-	andi	t0, v0, 0xff
-	beq	t0, zero, L(wexit01)
-	andi	t1, v1, 0xff
-	bne	t0, t1, L(wexit01)
+    andi    t0, v0, 0xff
+    beq t0, zero, L(wexit01)
+    andi    t1, v1, 0xff
+    bne t0, t1, L(wexit01)
+    EXT_COMPARE89(8)
+    EXT_COMPARE01(16)
+#ifndef __mips64
+    SRL t8, v0, 24
+    SRL t9, v1, 24
+#else
+    EXT_COMPARE89(24)
+    EXT_COMPARE01(32)
+    EXT_COMPARE89(40)
+    EXT_COMPARE01(48)
+    SRL t8, v0, 56
+    SRL t9, v1, 56
+#endif
 
-	srl	t8, v0, 8
-	srl	t9, v1, 8
-	andi	t8, t8, 0xff
-	beq	t8, zero, L(wexit89)
-	andi	t9, t9, 0xff
-	bne	t8, t9, L(wexit89)
-
-	srl	t0, v0, 16
-	srl	t1, v1, 16
-	andi	t0, t0, 0xff
-	beq	t0, zero, L(wexit01)
-	andi	t1, t1, 0xff
-	bne	t0, t1, L(wexit01)
-
-	srl	t8, v0, 24
-	srl	t9, v1, 24
 # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
-	srl	t0, v0, 24
-	beq	t0, zero, L(wexit01)
-	srl	t1, v1, 24
-	bne	t0, t1, L(wexit01)
+#ifdef __mips64
+    SRL t0, v0, 56
+    beq t0, zero, L(wexit01)
+    SRL t1, v1, 56
+    bne t0, t1, L(wexit01)
+    EXT_COMPARE89(48)
+    EXT_COMPARE01(40)
+    EXT_COMPARE89(32)
+    EXT_COMPARE01(24)
+#else
+    SRL t0, v0, 24
+    beq t0, zero, L(wexit01)
+    SRL t1, v1, 24
+    bne t0, t1, L(wexit01)
+#endif
+    EXT_COMPARE89(16)
+    EXT_COMPARE01(8)
 
-	srl	t8, v0, 16
-	srl	t9, v1, 16
-	andi	t8, t8, 0xff
-	beq	t8, zero, L(wexit89)
-	andi	t9, t9, 0xff
-	bne	t8, t9, L(wexit89)
-
-	srl	t0, v0, 8
-	srl	t1, v1, 8
-	andi	t0, t0, 0xff
-	beq	t0, zero, L(wexit01)
-	andi	t1, t1, 0xff
-	bne	t0, t1, L(wexit01)
-
-	andi	t8, v0, 0xff
-	andi	t9, v1, 0xff
+    andi    t8, v0, 0xff
+    andi    t9, v1, 0xff
 # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
 
 L(wexit89):
-	j	ra
-	subu	v0, t8, t9
+    j   ra
+    SUBU    v0, t8, t9
 L(wexit01):
-	j	ra
-	subu	v0, t0, t1
+    j   ra
+    SUBU    v0, t0, t1
 #endif /* USE_CLZ */
 
-/* It might seem better to do the 'beq' instruction between the two 'lbu'
-   instructions so that the nop is not needed but testing showed that this
-   code is actually faster (based on glibc strcmp test).  */
-#define BYTECMP01(OFFSET) \
-	lbu	v0, OFFSET(a0); \
-	lbu	v1, OFFSET(a1); \
-	beq	v0, zero, L(bexit01); \
-	nop; \
-	bne	v0, v1, L(bexit01)
-
-#define BYTECMP89(OFFSET) \
-	lbu	t8, OFFSET(a0); \
-	lbu	t9, OFFSET(a1); \
-	beq	t8, zero, L(bexit89); \
-	nop;	\
-	bne	t8, t9, L(bexit89)
-
 L(byteloop):
-	BYTECMP01(0)
-	BYTECMP89(1)
-	BYTECMP01(2)
-	BYTECMP89(3)
-	BYTECMP01(4)
-	BYTECMP89(5)
-	BYTECMP01(6)
-	BYTECMP89(7)
-	PTR_ADDIU a0, a0, 8
-	b	L(byteloop)
-	PTR_ADDIU a1, a1, 8
+    BYTECMP01(0)
+    BYTECMP89(1)
+    BYTECMP01(2)
+    BYTECMP89(3)
+    BYTECMP01(4)
+    BYTECMP89(5)
+    BYTECMP01(6)
+    BYTECMP89(7)
+    PTR_ADDIU a0, a0, 8
+    b   L(byteloop)
+    PTR_ADDIU a1, a1, 8
 
 L(bexit01):
-	j	ra
-	subu	v0, v0, v1
+    j   ra
+    SUBU    v0, v0, v1
 L(bexit89):
-	j	ra
-	subu	v0, t8, t9
+    j   ra
+    SUBU    v0, t8, t9
 
-	.set	at
-	.set	reorder
+    .set    at
+    .set    reorder
 
 END(STRCMP_NAME)
 #ifndef __ANDROID__
diff --git a/libc/arch-mips/string/strcpy.c b/libc/arch-mips/string/strcpy.c
new file mode 100644
index 0000000..7b5dee3
--- /dev/null
+++ b/libc/arch-mips/string/strcpy.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#define op_t        unsigned long int
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses?  */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#if __mips64
+typedef struct
+{
+  op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#else
+typedef struct
+{
+  op_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#endif
+
+typedef union
+{
+  op_t v;
+  bits_t b;
+} bitfields_t;
+
+#if !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+   the compiler to treat the pointer access as unaligned.  */
+struct ulw
+{
+  op_t uli;
+} __attribute__ ((packed));
+#endif /* !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT */
+
+#define DO_BYTE(i, ptdst) {  \
+  *(ptdst+i) = a.b.B##i;     \
+  if(a.b.B##i == '\0')       \
+    return ret;              \
+}
+
+#if __mips64
+#define DO_BYTES(val, dst) {   \
+  bitfields_t a;               \
+  char *tdst = (char *)(dst);  \
+  a.v = val;                   \
+  DO_BYTE(0, tdst)             \
+  DO_BYTE(1, tdst)             \
+  DO_BYTE(2, tdst)             \
+  DO_BYTE(3, tdst)             \
+  DO_BYTE(4, tdst)             \
+  DO_BYTE(5, tdst)             \
+  DO_BYTE(6, tdst)             \
+  DO_BYTE(7, tdst)             \
+}
+#else
+#define DO_BYTES(val, dst) {   \
+  bitfields_t a;               \
+  char *tdst = (char *)(dst);  \
+  a.v = val;                   \
+  DO_BYTE(0, tdst)             \
+  DO_BYTE(1, tdst)             \
+  DO_BYTE(2, tdst)             \
+  DO_BYTE(3, tdst)             \
+}
+#endif
+
+#define DO_WORD_ALIGNED(dst, src) {                 \
+  op_t val = *(src);                                \
+  if ((((val - mask_1) & ~val) & mask_128) != 0) {  \
+    DO_BYTES(val, dst);                             \
+  } else *(dst) = val;                              \
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+#define DO_WORD_UNALIGNED(dst, src) {               \
+  op_t val = *(src);                                \
+  if ((((val - mask_1) & ~val) & mask_128) != 0) {  \
+    DO_BYTES(val, dst);                             \
+  } else {                                          \
+    struct ulw *a = (struct ulw *)(dst);            \
+    a->uli = val;                                   \
+  }                                                 \
+}
+#else
+#define DO_WORD_UNALIGNED(dst, src) {                 \
+  op_t val = *(src);                                  \
+  if ((((val - mask_1) & ~val) & mask_128) != 0) {    \
+    DO_BYTES(val, dst);                               \
+  } else {                                            \
+    char *pdst = (char *) dst;                        \
+    const char *psrc = (const char *) src;            \
+    for (; (*pdst = *psrc) != '\0'; ++psrc, ++pdst);  \
+    return ret;                                       \
+  }                                                   \
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+
+#define PROCESS_UNALIGNED_WORDS(a, b) { \
+  while (1) {                           \
+    DO_WORD_UNALIGNED(a, b);            \
+    DO_WORD_UNALIGNED(a + 1, b + 1);    \
+    DO_WORD_UNALIGNED(a + 2, b + 2);    \
+    DO_WORD_UNALIGNED(a + 3, b + 3);    \
+    a += 4;                             \
+    b += 4;                             \
+  }                                     \
+}
+#endif /* HW_UNALIGNED_SUPPORT */
+
+#define PROCESS_ALIGNED_WORDS(a, b) {  \
+  while (1) {                          \
+    DO_WORD_ALIGNED(a, b);             \
+    DO_WORD_ALIGNED(a + 1, b + 1);     \
+    DO_WORD_ALIGNED(a + 2, b + 2);     \
+    DO_WORD_ALIGNED(a + 3, b + 3);     \
+    a += 4;                            \
+    b += 4;                            \
+  }                                    \
+}
+
+char *
+strcpy (char *to, const char *from) __overloadable
+{
+  char *ret = to;
+  op_t mask_1, mask_128;
+  const op_t *src;
+  op_t *dst;
+
+  for (; (*to = *from) != '\0' && ((size_t) from % sizeof (op_t)) != 0; ++from, ++to);
+
+  if(*to != '\0') {
+    __asm__ volatile (
+      "li %0, 0x01010101 \n\t"
+      : "=r" (mask_1)
+    );
+#if __mips64
+    mask_1 |= mask_1 << 32;
+#endif
+    mask_128 = mask_1 << 7;
+
+    src = (const op_t *) from;
+    dst = (op_t *) to;
+
+#if HW_UNALIGNED_SUPPORT
+    PROCESS_ALIGNED_WORDS(dst, src);
+#else
+    if (((unsigned long) dst) % sizeof (op_t) == 0) {
+      PROCESS_ALIGNED_WORDS(dst, src);
+    } else {
+      PROCESS_UNALIGNED_WORDS(dst, src);
+    }
+#endif
+  }
+
+  return ret;
+}
diff --git a/libc/arch-mips/string/strlen.c b/libc/arch-mips/string/strlen.c
index 488e3c8..491efae 100644
--- a/libc/arch-mips/string/strlen.c
+++ b/libc/arch-mips/string/strlen.c
@@ -1,43 +1,115 @@
-/*	$OpenBSD: strlen.c,v 1.8 2014/06/10 04:17:37 deraadt Exp $	*/
-
-/*-
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
  *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <string.h>
 
-size_t
-strlen(const char *str) __overloadable
-{
-	const char *s;
+#define op_t        unsigned long int
+#define op_size     sizeof (op_t)
 
-	for (s = str; *s; ++s)
-		;
-	return (s - str);
+#if __mips64 || __mips_isa_rev >= 2
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p, op_t inval)
+{
+  op_t outval = 0;
+#if __mips64
+  __asm__ volatile (
+    "dsbh %1, %0 \n\t"
+    "dshd %0, %1 \n\t"
+    "dclz %1, %0 \n\t"
+    : "+r" (inval), "+r" (outval)
+  );
+#else
+  __asm__ volatile (
+    "wsbh %1, %0 \n\t"
+    "rotr %0, %1, 16 \n\t"
+    "clz %1, %0 \n\t"
+    : "+r" (inval), "+r" (outval)
+  );
+#endif
+  p += (outval >> 3);
+  return (size_t) (p - base);
 }
 
+#define DO_WORD(w, cnt) {                                \
+  op_t val = ((w[cnt] - mask_1) & ~w[cnt]) & mask_128;   \
+  if (val)                                               \
+    return do_bytes(str, (const char *)(w + cnt), val);  \
+}
+#else
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p)
+{
+  for (; *p; ++p);
+  return (size_t) (p - base);
+}
+
+#define DO_WORD(w, cnt) {                           \
+  if (((w[cnt] - mask_1) & ~w[cnt]) & mask_128)     \
+    return do_bytes(str, (const char *)(w + cnt));  \
+}
+#endif
+
+size_t
+strlen (const char *str) __overloadable
+{
+  if (*str) {
+    const char *p = (const char *) str;
+    const op_t *w;
+    op_t mask_1, mask_128;
+
+    while ((size_t) p % sizeof (op_t)) {
+      if (!(*p))
+        return (p - str);
+      p++;
+    }
+
+    __asm__ volatile (
+      "li %0, 0x01010101 \n\t"
+      : "=r" (mask_1)
+    );
+#if __mips64
+    mask_1 |= mask_1 << 32;
+#endif
+    mask_128 = mask_1 << 7;
+
+    w = (const op_t *) p;
+
+    while (1) {
+      DO_WORD(w, 0);
+      DO_WORD(w, 1);
+      DO_WORD(w, 2);
+      DO_WORD(w, 3);
+      w += 4;
+    }
+  }
+  return 0;
+}
diff --git a/libc/arch-mips/string/strncmp.S b/libc/arch-mips/string/strncmp.S
new file mode 100644
index 0000000..4867c44
--- /dev/null
+++ b/libc/arch-mips/string/strncmp.S
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef __ANDROID__
+# include <private/bionic_asm.h>
+#elif _LIBC
+# include <sysdep.h>
+# include <regdef.h>
+# include <sys/asm.h>
+#elif _COMPILING_NEWLIB
+# include "machine/asm.h"
+# include "machine/regdef.h"
+#else
+# include <regdef.h>
+# include <sys/asm.h>
+#endif
+
+#if __mips64
+# define NSIZE 8
+# define LW ld
+# define LWR ldr
+# define LWL ldl
+# define EXT dext
+# define SRL dsrl
+# define SUBU dsubu
+#else
+# define NSIZE 4
+# define LW lw
+# define LWR lwr
+# define LWL lwl
+# define EXT ext
+# define SRL srl
+# define SUBU subu
+#endif
+
+/* Technically strcmp should not read past the end of the strings being
+   compared.  We will read a full word that may contain excess bits beyond
+   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
+   read the next word after the end of string.  Setting ENABLE_READAHEAD will
+   improve performance but is technically illegal based on the definition of
+   strcmp.  */
+#ifdef ENABLE_READAHEAD
+# define DELAY_READ
+#else
+# define DELAY_READ nop
+#endif
+
+/* Testing on a little endian machine showed using CLZ was a
+   performance loss, so we are not turning it on by default.  */
+#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
+# define USE_CLZ
+#endif
+
+/* Some asm.h files do not have the L macro definition.  */
+#ifndef L
+# if _MIPS_SIM == _ABIO32
+#  define L(label) $L ## label
+# else
+#  define L(label) .L ## label
+# endif
+#endif
+
+/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
+#ifndef PTR_ADDIU
+# if _MIPS_SIM == _ABIO32
+#  define PTR_ADDIU       addiu
+# else
+#  define PTR_ADDIU       daddiu
+# endif
+#endif
+
+/* It might seem better to do the 'beq' instruction between the two 'lbu'
+   instructions so that the nop is not needed but testing showed that this
+   code is actually faster (based on glibc strcmp test).  */
+#define BYTECMP01(OFFSET) \
+    lbu v0, OFFSET(a0); \
+    lbu v1, OFFSET(a1); \
+    beq v0, zero, L(bexit01); \
+    nop; \
+    bne v0, v1, L(bexit01)
+
+#define BYTECMP89(OFFSET) \
+    lbu t8, OFFSET(a0); \
+    lbu t9, OFFSET(a1); \
+    beq t8, zero, L(bexit89); \
+    nop;    \
+    bne t8, t9, L(bexit89)
+
+/* Allow the routine to be named something else if desired.  */
+#ifndef STRNCMP_NAME
+# define STRNCMP_NAME strncmp
+#endif
+
+#ifdef __ANDROID__
+LEAF(STRNCMP_NAME, 0)
+#else
+LEAF(STRNCMP_NAME)
+#endif
+    .set    nomips16
+    .set    noreorder
+
+    srl t0, a2, (2 + NSIZE / 4)
+    beqz  t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
+    andi t1, a1, (NSIZE - 1)
+    beqz  t1, L(exitalign)
+    or   t0, zero, NSIZE
+    SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
+    SUBU a2, a2, t1 #dec count by t1
+
+L(alignloop): #do by bytes until a1 aligned
+    BYTECMP01(0)
+    SUBU t1, t1, 0x1
+    PTR_ADDIU a0, a0, 0x1
+    bne  t1, zero, L(alignloop)
+    PTR_ADDIU a1, a1, 0x1
+
+L(exitalign):
+
+/* string a1 is NSIZE byte aligned at this point. */
+#ifndef __mips1
+    lui t8, 0x0101
+    ori t8, 0x0101
+    lui t9, 0x7f7f
+    ori t9, 0x7f7f
+#if __mips64
+    dsll t0, t8, 32
+    or  t8, t0
+    dsll t1, t9, 32
+    or  t9, t1
+#endif
+#endif
+
+/* hardware or software alignment not supported for mips1
+   rev6 archs have h/w unaligned support
+   remainings archs need to implemented with unaligned instructions */
+
+#if __mips1
+    andi t0, a0, (NSIZE - 1)
+    bne  t0, zero, L(byteloop)
+#elif __mips_isa_rev < 6
+    andi t0, a0, (NSIZE - 1)
+    bne  t0, zero, L(uwordloop)
+#endif
+
+#define STRCMPW(OFFSET) \
+    LW   v0, (OFFSET)(a0); \
+    LW   v1, (OFFSET)(a1); \
+    SUBU t0, v0, t8; \
+    bne  v0, v1, L(worddiff); \
+    nor  t1, v0, t9; \
+    and  t0, t0, t1; \
+    bne  t0, zero, L(returnzero);\
+
+L(wordloop):
+    SUBU t1, a2, (8 * NSIZE)
+    bltz t1, L(onewords)
+    STRCMPW(0 * NSIZE)
+    DELAY_READ
+    STRCMPW(1 * NSIZE)
+    DELAY_READ
+    STRCMPW(2 * NSIZE)
+    DELAY_READ
+    STRCMPW(3 * NSIZE)
+    DELAY_READ
+    STRCMPW(4 * NSIZE)
+    DELAY_READ
+    STRCMPW(5 * NSIZE)
+    DELAY_READ
+    STRCMPW(6 * NSIZE)
+    DELAY_READ
+    STRCMPW(7 * NSIZE)
+    SUBU a2, a2, (8 * NSIZE)
+    PTR_ADDIU a0, a0, (8 * NSIZE)
+    b   L(wordloop)
+    PTR_ADDIU a1, a1, (8 * NSIZE)
+
+L(onewords):
+    SUBU t1, a2, NSIZE
+    bltz t1, L(byteloop)
+    STRCMPW(0)
+    SUBU a2, a2, NSIZE
+    PTR_ADDIU a0, a0, NSIZE
+    b   L(onewords)
+    PTR_ADDIU a1, a1, NSIZE
+
+#if __mips_isa_rev < 6 && !__mips1
+#define USTRCMPW(OFFSET) \
+    LWR v0, (OFFSET)(a0); \
+    LWL v0, (OFFSET + NSIZE - 1)(a0); \
+    LW  v1, (OFFSET)(a1); \
+    SUBU    t0, v0, t8; \
+    bne v0, v1, L(worddiff); \
+    nor t1, v0, t9; \
+    and t0, t0, t1; \
+    bne t0, zero, L(returnzero);\
+
+L(uwordloop):
+    SUBU t1, a2, (8 * NSIZE)
+    bltz t1, L(uonewords)
+    USTRCMPW(0 * NSIZE)
+    DELAY_READ
+    USTRCMPW(1 * NSIZE)
+    DELAY_READ
+    USTRCMPW(2 * NSIZE)
+    DELAY_READ
+    USTRCMPW(3 * NSIZE)
+    DELAY_READ
+    USTRCMPW(4 * NSIZE)
+    DELAY_READ
+    USTRCMPW(5 * NSIZE)
+    DELAY_READ
+    USTRCMPW(6 * NSIZE)
+    DELAY_READ
+    USTRCMPW(7 * NSIZE)
+    SUBU a2, a2, (8 * NSIZE)
+    PTR_ADDIU a0, a0, (8 * NSIZE)
+    b   L(uwordloop)
+    PTR_ADDIU a1, a1, (8 * NSIZE)
+
+L(uonewords):
+    SUBU t1, a2, NSIZE
+    bltz t1, L(byteloop)
+    USTRCMPW(0)
+    SUBU a2, a2, NSIZE
+    PTR_ADDIU a0, a0, NSIZE
+    b   L(uonewords)
+    PTR_ADDIU a1, a1, NSIZE
+
+#endif
+
+L(returnzero):
+    j   ra
+    move    v0, zero
+
+#if __mips_isa_rev > 1
+#define EXT_COMPARE01(POS) \
+    EXT t0, v0, POS, 8; \
+    beq t0, zero, L(wexit01); \
+    EXT t1, v1, POS, 8; \
+    bne t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+    EXT t8, v0, POS, 8; \
+    beq t8, zero, L(wexit89); \
+    EXT t9, v1, POS, 8; \
+    bne t8, t9, L(wexit89)
+#else
+#define EXT_COMPARE01(POS) \
+    SRL  t0, v0, POS; \
+    SRL  t1, v1, POS; \
+    andi t0, t0, 0xff; \
+    beq  t0, zero, L(wexit01); \
+    andi t1, t1, 0xff; \
+    bne  t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+    SRL  t8, v0, POS; \
+    SRL  t9, v1, POS; \
+    andi t8, t8, 0xff; \
+    beq  t8, zero, L(wexit89); \
+    andi t9, t9, 0xff; \
+    bne  t8, t9, L(wexit89)
+#endif
+
+L(worddiff):
+#ifdef USE_CLZ
+    SUBU    t0, v0, t8
+    nor t1, v0, t9
+    and t1, t0, t1
+    xor t0, v0, v1
+    or  t0, t0, t1
+# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    wsbh    t0, t0
+    rotr    t0, t0, 16
+# endif
+    clz t1, t0
+    and t1, 0xf8
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    neg t1
+    addu    t1, 24
+# endif
+    rotrv   v0, v0, t1
+    rotrv   v1, v1, t1
+    and v0, v0, 0xff
+    and v1, v1, 0xff
+    j   ra
+    SUBU    v0, v0, v1
+#else /* USE_CLZ */
+# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    andi    t0, v0, 0xff
+    beq t0, zero, L(wexit01)
+    andi    t1, v1, 0xff
+    bne t0, t1, L(wexit01)
+    EXT_COMPARE89(8)
+    EXT_COMPARE01(16)
+#ifndef __mips64
+    SRL t8, v0, 24
+    SRL t9, v1, 24
+#else
+    EXT_COMPARE89(24)
+    EXT_COMPARE01(32)
+    EXT_COMPARE89(40)
+    EXT_COMPARE01(48)
+    SRL t8, v0, 56
+    SRL t9, v1, 56
+#endif
+
+# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+#ifdef __mips64
+    SRL t0, v0, 56
+    beq t0, zero, L(wexit01)
+    SRL t1, v1, 56
+    bne t0, t1, L(wexit01)
+    EXT_COMPARE89(48)
+    EXT_COMPARE01(40)
+    EXT_COMPARE89(32)
+    EXT_COMPARE01(24)
+#else
+    SRL t0, v0, 24
+    beq t0, zero, L(wexit01)
+    SRL t1, v1, 24
+    bne t0, t1, L(wexit01)
+#endif
+    EXT_COMPARE89(16)
+    EXT_COMPARE01(8)
+
+    andi    t8, v0, 0xff
+    andi    t9, v1, 0xff
+# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+
+L(wexit89):
+    j   ra
+    SUBU    v0, t8, t9
+L(wexit01):
+    j   ra
+    SUBU    v0, t0, t1
+#endif /* USE_CLZ */
+
+L(byteloop):
+    beq a2, zero, L(returnzero)
+    SUBU a2, a2, 1
+    BYTECMP01(0)
+    nop
+    beq a2, zero, L(returnzero)
+    SUBU a2, a2, 1
+    BYTECMP89(1)
+    nop
+    beq a2, zero, L(returnzero)
+    SUBU a2, a2, 1
+    BYTECMP01(2)
+    nop
+    beq a2, zero, L(returnzero)
+    SUBU a2, a2, 1
+    BYTECMP89(3)
+    PTR_ADDIU a0, a0, 4
+    b   L(byteloop)
+    PTR_ADDIU a1, a1, 4
+
+L(bexit01):
+    j   ra
+    SUBU    v0, v0, v1
+L(bexit89):
+    j   ra
+    SUBU    v0, t8, t9
+
+    .set    at
+    .set    reorder
+
+END(STRNCMP_NAME)
+#ifndef __ANDROID__
+# ifdef _LIBC
+libc_hidden_builtin_def (STRNCMP_NAME)
+# endif
+#endif
diff --git a/libc/arch-mips/string/strnlen.c b/libc/arch-mips/string/strnlen.c
new file mode 100644
index 0000000..2011deb
--- /dev/null
+++ b/libc/arch-mips/string/strnlen.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer
+ *        in the documentation and/or other materials provided with
+ *        the distribution.
+ *      * Neither the name of Imagination Technologies nor the names of its
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#define op_t                unsigned long int
+#define op_size             sizeof (op_t)
+
+#if __mips64 || __mips_isa_rev >= 2
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p, op_t inval)
+{
+  op_t outval = 0;
+#if __mips64
+  __asm__ volatile (
+    "dsbh %1, %0 \n\t"
+    "dshd %0, %1 \n\t"
+    "dclz %1, %0 \n\t"
+    : "+r" (inval), "+r" (outval)
+  );
+#else
+  __asm__ volatile (
+    "wsbh %1, %0 \n\t"
+    "rotr %0, %1, 16 \n\t"
+    "clz %1, %0 \n\t"
+    : "+r" (inval), "+r" (outval)
+  );
+#endif
+  p += (outval >> 3);
+  return (size_t) (p - base);
+}
+
+#define DO_WORD(in, val) {                          \
+  op_t tmp = ((val - mask_1) & ~val) & mask_128;    \
+  if (tmp)                                          \
+    return do_bytes(str, (const char *)(in), tmp);  \
+}
+#else
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p)
+{
+  for (; *p; ++p);
+  return (size_t) (p - base);
+}
+
+#define DO_WORD(in, val) {                     \
+  if (((val - mask_1) & ~val) & mask_128) {    \
+    return do_bytes(str, (const char *)(in));  \
+  }                                            \
+}
+#endif
+
+size_t strnlen (const char *str, size_t n) {
+  if (n != 0) {
+    const char *p = (const char *) str;
+    const op_t *w;
+    op_t mask_1, mask_128;
+
+    for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
+      if (!(*p))
+        return (p - str);
+    }
+
+    w = (const op_t *) p;
+
+    __asm__ volatile (
+      "li %0, 0x01010101 \n\t"
+      : "=r" (mask_1)
+    );
+#if __mips64
+    mask_1 |= mask_1 << 32;
+#endif
+    mask_128 = mask_1 << 7;
+
+    /*
+     * Check op_size byteswize after initial alignment
+     */
+    while (n >= 4 * op_size) {
+      const op_t w0 = w[0];
+      const op_t w1 = w[1];
+      const op_t w2 = w[2];
+      const op_t w3 = w[3];
+      DO_WORD(w + 0, w0)
+      DO_WORD(w + 1, w1)
+      DO_WORD(w + 2, w2)
+      DO_WORD(w + 3, w3)
+      w += 4;
+      n -= 4 * op_size;
+    }
+
+    while (n >= op_size) {
+      DO_WORD(w, w[0]);
+      w++;
+      n -= op_size;
+    }
+
+    /*
+     * Check bytewize for remaining bytes
+     */
+    p = (const char *) w;
+    for (; n > 0; --n, ++p) {
+      if (!(*p))
+        return (p - str);
+    }
+
+    return (p - str);
+  }
+
+  return 0;
+}