Add libc optimizations to bionic for MIPS
Test: Used bionic tests available under bionic/tests folder.
Tested for mips32r1/mips32r2/mips64r6 on emulators.
Change-Id: I589415ddc496df3f6067ae34cb33ca58b3a1f276
Signed-off-by: Prashant Patil <prashant.patil@imgtec.com>
diff --git a/libc/Android.bp b/libc/Android.bp
index 1ecef1f..5d0e8c7 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -632,7 +632,22 @@
"upstream-openbsd/lib/libc/string/strncmp.c",
],
},
-
+ mips: {
+ exclude_srcs: [
+ "upstream-openbsd/lib/libc/string/memchr.c",
+ "upstream-openbsd/lib/libc/string/memmove.c",
+ "upstream-openbsd/lib/libc/string/strcpy.c",
+ "upstream-openbsd/lib/libc/string/strncmp.c",
+ ],
+ },
+ mips64: {
+ exclude_srcs: [
+ "upstream-openbsd/lib/libc/string/memchr.c",
+ "upstream-openbsd/lib/libc/string/memmove.c",
+ "upstream-openbsd/lib/libc/string/strcpy.c",
+ "upstream-openbsd/lib/libc/string/strncmp.c",
+ ],
+ },
x86: {
exclude_srcs: [
"upstream-openbsd/lib/libc/string/memchr.c",
@@ -1041,9 +1056,16 @@
mips: {
srcs: [
"arch-mips/string/memcmp.c",
- "arch-mips/string/memcpy.S",
+ "arch-mips/string/memcpy.c",
"arch-mips/string/memset.S",
"arch-mips/string/strcmp.S",
+ "arch-mips/string/strncmp.S",
+ "arch-mips/string/strlen.c",
+ "arch-mips/string/strnlen.c",
+ "arch-mips/string/strchr.c",
+ "arch-mips/string/strcpy.c",
+ "arch-mips/string/memchr.c",
+ "arch-mips/string/memmove.c",
"arch-mips/bionic/__bionic_clone.S",
"arch-mips/bionic/cacheflush.cpp",
@@ -1052,25 +1074,25 @@
"arch-mips/bionic/setjmp.S",
"arch-mips/bionic/syscall.S",
"arch-mips/bionic/vfork.S",
-
- "arch-mips/string/mips_strlen.c",
],
- rev6: {
- srcs: [
- "arch-mips/string/strlen.c",
- ],
- exclude_srcs: [
- "arch-mips/string/mips_strlen.c",
- ],
- },
+ exclude_srcs: [
+ "bionic/strchr.cpp",
+ "bionic/strnlen.c",
+ ],
},
mips64: {
srcs: [
"arch-mips/string/memcmp.c",
- "arch-mips/string/memcpy.S",
+ "arch-mips/string/memcpy.c",
"arch-mips/string/memset.S",
"arch-mips/string/strcmp.S",
+ "arch-mips/string/strncmp.S",
"arch-mips/string/strlen.c",
+ "arch-mips/string/strnlen.c",
+ "arch-mips/string/strchr.c",
+ "arch-mips/string/strcpy.c",
+ "arch-mips/string/memchr.c",
+ "arch-mips/string/memmove.c",
"arch-mips64/bionic/__bionic_clone.S",
"arch-mips64/bionic/_exit_with_stack_teardown.S",
@@ -1079,6 +1101,10 @@
"arch-mips64/bionic/vfork.S",
"arch-mips64/bionic/stat.cpp",
],
+ exclude_srcs: [
+ "bionic/strchr.cpp",
+ "bionic/strnlen.c",
+ ],
},
x86: {
diff --git a/libc/NOTICE b/libc/NOTICE
index 9f0d2c5..2ce293f 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -4816,38 +4816,6 @@
-------------------------------------------------------------------
-Copyright (c) 2010 MIPS Technologies, Inc.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with
- the distribution.
- * Neither the name of MIPS Technologies Inc. nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------
-
Copyright (c) 2010 The NetBSD Foundation, Inc.
All rights reserved.
@@ -5344,35 +5312,6 @@
-------------------------------------------------------------------
-Copyright (c) 2012-2015
- MIPS Technologies, Inc., California.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
Copyright (c) 2013
MIPS Technologies, Inc., California.
@@ -5586,35 +5525,6 @@
-------------------------------------------------------------------
-Copyright (c) 2014
- Imagination Technologies Limited.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
Copyright (c) 2014 Bob Beck <beck@obtuse.com>
@@ -5750,6 +5660,38 @@
-------------------------------------------------------------------
+Copyright (c) 2017 Imagination Technologies.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with
+ the distribution.
+ * Neither the name of Imagination Technologies nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
Copyright (c)1999 Citrus Project,
All rights reserved.
diff --git a/libc/arch-mips/string/memchr.c b/libc/arch-mips/string/memchr.c
new file mode 100644
index 0000000..6b4c8cc
--- /dev/null
+++ b/libc/arch-mips/string/memchr.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#define ENABLE_PREFETCH 1
+#define op_t unsigned long int
+#define op_size sizeof (op_t)
+
+#if ENABLE_PREFETCH
+#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
+#else
+#define PREFETCH(addr)
+#endif
+
+#if __mips64 || __mips_isa_rev >= 2
+static inline void * __attribute__ ((always_inline))
+do_bytes (const op_t* w, op_t inval)
+{
+ const unsigned char *p = (const unsigned char *) w;
+ op_t outval = 0;
+#if __mips64
+ __asm__ volatile (
+ "dsbh %1, %0 \n\t"
+ "dshd %0, %1 \n\t"
+ "dclz %1, %0 \n\t"
+ : "+r" (inval), "+r" (outval)
+ );
+#else
+ __asm__ volatile (
+ "wsbh %1, %0 \n\t"
+ "rotr %0, %1, 16 \n\t"
+ "clz %1, %0 \n\t"
+ : "+r" (inval), "+r" (outval)
+ );
+#endif
+ p += (outval >> 3);
+ return (void *) p;
+}
+
+#define DO_WORD(in, val) { \
+ op_t tmp = ((val - mask_1) & ~val) & mask_128; \
+ if (tmp != 0) \
+ return do_bytes(in, tmp); \
+}
+#else
+static inline void * __attribute__ ((always_inline))
+do_bytes (const op_t* w, unsigned char ch)
+{
+ const unsigned char *p = (const unsigned char *) w;
+ for (; *p != ch; ++p);
+ return (void *) p;
+}
+
+#define DO_WORD(in, val) { \
+ op_t tmp = ((val - mask_1) & ~val) & mask_128; \
+ if (tmp != 0) \
+ return do_bytes(in, ch); \
+}
+#endif
+
+#define DO_WORDS(w) { \
+ op_t* w1 = (op_t*) w; \
+ op_t val0 = w1[0] ^ mask_c; \
+ op_t val1 = w1[1] ^ mask_c; \
+ op_t val2 = w1[2] ^ mask_c; \
+ op_t val3 = w1[3] ^ mask_c; \
+ DO_WORD(w1, val0) \
+ DO_WORD(w1 + 1, val1) \
+ DO_WORD(w1 + 2, val2) \
+ DO_WORD(w1 + 3, val3) \
+}
+
+void *
+memchr (void const *s, int c_in, size_t n) __overloadable
+{
+ if (n != 0) {
+ const unsigned char *p = (const unsigned char *) s;
+ const op_t *w;
+ op_t mask_1, mask_128, mask_c;
+ unsigned char ch = (unsigned char) c_in;
+
+ /*
+ * Check bytewize till initial alignment
+ */
+ for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
+ if (*p == ch)
+ return (void *) p;
+ }
+
+ w = (const op_t *) p;
+
+ mask_c = ch | (ch << 8);
+ mask_c |= mask_c << 16;
+ __asm__ volatile (
+ "li %0, 0x01010101 \n\t"
+ : "=r" (mask_1)
+ );
+#if __mips64
+ mask_1 |= mask_1 << 32;
+ mask_c |= mask_c << 32;
+#endif
+ mask_128 = mask_1 << 7;
+
+ /*
+ * Check op_size byteswize after initial alignment
+ */
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ PREFETCH (w);
+ PREFETCH (w + 8);
+ while (n >= 24 * op_size) {
+ PREFETCH(w + 16);
+ DO_WORDS(w);
+ DO_WORDS(w + 4);
+ w += 8;
+ n -= 8 * op_size;
+ }
+ while (n >= 8 * op_size) {
+ DO_WORDS(w);
+ DO_WORDS(w + 4);
+ w += 8;
+ n -= 8 * op_size;
+ }
+#else
+ PREFETCH (w);
+ PREFETCH (w + 4);
+ while (n >= 12 * op_size) {
+ PREFETCH(w + 8);
+ DO_WORDS(w);
+ w += 4;
+ n -= 4 * op_size;
+ }
+ while (n >= 4 * op_size) {
+ DO_WORDS(w);
+ w += 4;
+ n -= 4 * op_size;
+ }
+#endif
+
+ while (n >= op_size) {
+ op_t val = *w ^ mask_c;
+ DO_WORD(w, val);
+ w++;
+ n -= op_size;
+ }
+
+ /*
+ * Check bytewize for remaining bytes
+ */
+ p = (const unsigned char *) w;
+ for (; n > 0; --n, ++p) {
+ if (*p == ch)
+ return (void *) p;
+ }
+ }
+ return NULL;
+}
diff --git a/libc/arch-mips/string/memcmp.c b/libc/arch-mips/string/memcmp.c
index 8640954..eb4ad07 100644
--- a/libc/arch-mips/string/memcmp.c
+++ b/libc/arch-mips/string/memcmp.c
@@ -1,51 +1,352 @@
/*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (c) 2017 Imagination Technologies.
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#include <string.h>
+#include <stdint.h>
-int memcmp(const void *s1, const void *s2, size_t n)
+#define ENABLE_PREFETCH 1
+
+#define STRNG(X) #X
+#define PREFETCH(src_ptr, offset) \
+ asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses? */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define SIZEOF_reg_t 4
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct bits
{
- const unsigned char* p1 = s1;
- const unsigned char* end1 = p1 + n;
- const unsigned char* p2 = s2;
- int d = 0;
+ reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+#undef SIZEOF_reg_t
+#define SIZEOF_reg_t 8
+typedef unsigned long long reg_t;
+typedef struct bits
+{
+ reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
- for (;;) {
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
+/* This union assumes that small structures can be in registers. If
+ not, then memory accesses will be done - not optimal, but ok. */
+typedef union
+{
+ reg_t v;
+ bits_t b;
+} bitfields_t;
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
+#define do_bitfield(__i) \
+ if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
+/* pull apart the words to find the first differing unsigned byte. */
+static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
+{
+ bitfields_t x, y;
+ x.v = a;
+ y.v = b;
+ do_bitfield (0);
+ do_bitfield (1);
+ do_bitfield (2);
+#if SIZEOF_reg_t == 4
+ return x.b.B3 - y.b.B3;
+#else
+ do_bitfield (3);
+ do_bitfield (4);
+ do_bitfield (5);
+ do_bitfield (6);
+ return x.b.B7 - y.b.B7;
+#endif
+}
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
- }
- return d;
+/* This code is called when aligning a pointer, there are remaining bytes
+ after doing word compares, or architecture does not have some form
+ of unaligned support. */
+static inline int __attribute__ ((always_inline))
+do_bytes (const void *a, const void *b, unsigned long len)
+{
+ unsigned char *x = (unsigned char *) a;
+ unsigned char *y = (unsigned char *) b;
+ unsigned long i;
+
+ /* 'len' might be zero here, so preloading the first two values
+ before the loop may access unallocated memory. */
+ for (i = 0; i < len; i++) {
+ if (*x != *y)
+ return *x - *y;
+ x++;
+ y++;
+ }
+ return 0;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+ the compiler to treat the pointer access as unaligned. */
+struct ulw
+{
+ reg_t uli;
+} __attribute__ ((packed));
+
+/* first pointer is not aligned while second pointer is. */
+static int unaligned_words (const struct ulw *a, const reg_t *b,
+ unsigned long words, unsigned long bytes)
+{
+#if ENABLE_PREFETCH
+ /* prefetch pointer aligned to 32 byte boundary */
+ const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
+ const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
+#endif
+ for (; words >= 16; words -= 8) {
+#if ENABLE_PREFETCH
+ pref_ptr += 8;
+ PREFETCH(pref_ptr, 0);
+ PREFETCH(pref_ptr, 32);
+
+ pref_ptr_a += 8;
+ PREFETCH(pref_ptr_a, 0);
+ PREFETCH(pref_ptr_a, 32);
+#endif
+ reg_t x0 = a[0].uli, x1 = a[1].uli;
+ reg_t x2 = a[2].uli, x3 = a[3].uli;
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ x0 = a[4].uli; x1 = a[5].uli;
+ x2 = a[6].uli; x3 = a[7].uli;
+ y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ a += 8;
+ b += 8;
+ }
+
+ for (; words >= 4; words -= 4) {
+ reg_t x0 = a[0].uli, x1 = a[1].uli;
+ reg_t x2 = a[2].uli, x3 = a[3].uli;
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+ a += 4;
+ b += 4;
+ }
+
+ /* do remaining words. */
+ while (words--) {
+ reg_t x0 = a->uli;
+ reg_t y0 = *b;
+ a += 1;
+ b += 1;
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes (a, b, bytes);
+}
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions. */
+static int unaligned_words (const reg_t *a, const reg_t *b,
+ unsigned long words, unsigned long bytes)
+{
+ return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned. */
+static int aligned_words (const reg_t *a, const reg_t *b,
+ unsigned long words, unsigned long bytes)
+{
+#if ENABLE_PREFETCH
+ /* prefetch pointer aligned to 32 byte boundary */
+ const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
+ const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
+#endif
+
+ for (; words >= 24; words -= 12) {
+#if ENABLE_PREFETCH
+ pref_ptr += 12;
+ PREFETCH(pref_ptr, 0);
+ PREFETCH(pref_ptr, 32);
+ PREFETCH(pref_ptr, 64);
+
+ pref_ptr_a += 12;
+ PREFETCH(pref_ptr_a, 0);
+ PREFETCH(pref_ptr_a, 32);
+ PREFETCH(pref_ptr_a, 64);
+#endif
+ reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
+ y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
+ y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ a += 12;
+ b += 12;
+ }
+
+ for (; words >= 4; words -= 4) {
+ reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+ a += 4;
+ b += 4;
+ }
+
+ /* do remaining words. */
+ while (words--) {
+ reg_t x0 = *a;
+ reg_t y0 = *b;
+ a += 1;
+ b += 1;
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes (a, b, bytes);
+}
+
+int memcmp (const void *a, const void *b, size_t len)
+{
+ unsigned long bytes, words;
+
+ /* shouldn't hit that often. */
+ if (len < sizeof (reg_t) * 4) {
+ return do_bytes (a, b, len);
+ }
+
+ /* Align the second pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) b) % sizeof (reg_t);
+ if (bytes) {
+ int res;
+ bytes = sizeof (reg_t) - bytes;
+ if (bytes > len)
+ bytes = len;
+ res = do_bytes (a, b, bytes);
+ if (res || len == bytes)
+ return res;
+ len -= bytes;
+ a = (const void *) (((unsigned char *) a) + bytes);
+ b = (const void *) (((unsigned char *) b) + bytes);
+ }
+
+ /* Second pointer now aligned. */
+ words = len / sizeof (reg_t);
+ bytes = len % sizeof (reg_t);
+
+#if HW_UNALIGNED_SUPPORT
+ /* treat possible unaligned first pointer as aligned. */
+ return aligned_words (a, b, words, bytes);
+#else
+ if (((unsigned long) a) % sizeof (reg_t) == 0) {
+ return aligned_words (a, b, words, bytes);
+ }
+ /* need to use unaligned instructions on first pointer. */
+ return unaligned_words (a, b, words, bytes);
+#endif
}
diff --git a/libc/arch-mips/string/memcpy.S b/libc/arch-mips/string/memcpy.S
deleted file mode 100644
index 0b711bd..0000000
--- a/libc/arch-mips/string/memcpy.S
+++ /dev/null
@@ -1,852 +0,0 @@
-/*
- * Copyright (c) 2012-2015
- * MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __ANDROID__
-# include <private/bionic_asm.h>
-# define USE_MEMMOVE_FOR_OVERLAP
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
-# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
-#elif _LIBC
-# include <sysdep.h>
-# include <regdef.h>
-# include <sys/asm.h>
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
-# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
-#elif _COMPILING_NEWLIB
-# include "machine/asm.h"
-# include "machine/regdef.h"
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
-# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
-#else
-# include <regdef.h>
-# include <sys/asm.h>
-#endif
-
-/* Check to see if the MIPS architecture we are compiling for supports
- * prefetching.
- */
-
-#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
-# ifndef DISABLE_PREFETCH
-# define USE_PREFETCH
-# endif
-#endif
-
-#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
-# ifndef DISABLE_DOUBLE
-# define USE_DOUBLE
-# endif
-#endif
-
-
-#if __mips_isa_rev > 5
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-# undef PREFETCH_STORE_HINT
-# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
-# endif
-# define R6_CODE
-#endif
-
-/* Some asm.h files do not have the L macro definition. */
-#ifndef L
-# if _MIPS_SIM == _ABIO32
-# define L(label) $L ## label
-# else
-# define L(label) .L ## label
-# endif
-#endif
-
-/* Some asm.h files do not have the PTR_ADDIU macro definition. */
-#ifndef PTR_ADDIU
-# if _MIPS_SIM == _ABIO32
-# define PTR_ADDIU addiu
-# else
-# define PTR_ADDIU daddiu
-# endif
-#endif
-
-/* Some asm.h files do not have the PTR_SRA macro definition. */
-#ifndef PTR_SRA
-# if _MIPS_SIM == _ABIO32
-# define PTR_SRA sra
-# else
-# define PTR_SRA dsra
-# endif
-#endif
-
-/* New R6 instructions that may not be in asm.h. */
-#ifndef PTR_LSA
-# if _MIPS_SIM == _ABIO32
-# define PTR_LSA lsa
-# else
-# define PTR_LSA dlsa
-# endif
-#endif
-
-/*
- * Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
- * prefetches appears to offer a slight preformance advantage.
- *
- * Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
- * or PREFETCH_STORE_STREAMED offers a large performance advantage
- * but PREPAREFORSTORE has some special restrictions to consider.
- *
- * Prefetch with the 'prepare for store' hint does not copy a memory
- * location into the cache, it just allocates a cache line and zeros
- * it out. This means that if you do not write to the entire cache
- * line before writing it out to memory some data will get zero'ed out
- * when the cache line is written back to memory and data will be lost.
- *
- * Also if you are using this memcpy to copy overlapping buffers it may
- * not behave correctly when using the 'prepare for store' hint. If you
- * use the 'prepare for store' prefetch on a memory area that is in the
- * memcpy source (as well as the memcpy destination), then you will get
- * some data zero'ed out before you have a chance to read it and data will
- * be lost.
- *
- * If you are going to use this memcpy routine with the 'prepare for store'
- * prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid
- * the problem of running memcpy on overlapping buffers.
- *
- * There are ifdef'ed sections of this memcpy to make sure that it does not
- * do prefetches on cache lines that are not going to be completely written.
- * This code is only needed and only used when PREFETCH_STORE_HINT is set to
- * PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
- * 32 bytes and if the cache line is larger it will not work correctly.
- */
-
-#ifdef USE_PREFETCH
-# define PREFETCH_HINT_LOAD 0
-# define PREFETCH_HINT_STORE 1
-# define PREFETCH_HINT_LOAD_STREAMED 4
-# define PREFETCH_HINT_STORE_STREAMED 5
-# define PREFETCH_HINT_LOAD_RETAINED 6
-# define PREFETCH_HINT_STORE_RETAINED 7
-# define PREFETCH_HINT_WRITEBACK_INVAL 25
-# define PREFETCH_HINT_PREPAREFORSTORE 30
-
-/*
- * If we have not picked out what hints to use at this point use the
- * standard load and store prefetch hints.
- */
-# ifndef PREFETCH_STORE_HINT
-# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
-# endif
-# ifndef PREFETCH_LOAD_HINT
-# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD
-# endif
-
-/*
- * We double everything when USE_DOUBLE is true so we do 2 prefetches to
- * get 64 bytes in that case. The assumption is that each individual
- * prefetch brings in 32 bytes.
- */
-
-# ifdef USE_DOUBLE
-# define PREFETCH_CHUNK 64
-# define PREFETCH_FOR_LOAD(chunk, reg) \
- pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \
- pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg)
-# define PREFETCH_FOR_STORE(chunk, reg) \
- pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
- pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
-# else
-# define PREFETCH_CHUNK 32
-# define PREFETCH_FOR_LOAD(chunk, reg) \
- pref PREFETCH_LOAD_HINT, (chunk)*32(reg)
-# define PREFETCH_FOR_STORE(chunk, reg) \
- pref PREFETCH_STORE_HINT, (chunk)*32(reg)
-# endif
-/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
- * than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
- * of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
- * hint is used, the code will not work correctly. If PREPAREFORSTORE is not
- * used then MAX_PREFETCH_SIZE does not matter. */
-# define MAX_PREFETCH_SIZE 128
-/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
- * than 5 on a STORE prefetch and that a single prefetch can never be larger
- * than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
- * we actually do two prefetches in that case, one 32 bytes after the other. */
-# ifdef USE_DOUBLE
-# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
-# else
-# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
-# endif
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
- && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
-/* We cannot handle this because the initial prefetches may fetch bytes that
- * are before the buffer being copied. We start copies with an offset
- * of 4 so avoid this situation when using PREPAREFORSTORE. */
-#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
-# endif
-#else /* USE_PREFETCH not defined */
-# define PREFETCH_FOR_LOAD(offset, reg)
-# define PREFETCH_FOR_STORE(offset, reg)
-#endif
-
-/* Allow the routine to be named something else if desired. */
-#ifndef MEMCPY_NAME
-# define MEMCPY_NAME memcpy
-#endif
-
-/* We use these 32/64 bit registers as temporaries to do the copying. */
-#define REG0 t0
-#define REG1 t1
-#define REG2 t2
-#define REG3 t3
-#if defined(_MIPS_SIM) && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
-# define REG4 t4
-# define REG5 t5
-# define REG6 t6
-# define REG7 t7
-#else
-# define REG4 ta0
-# define REG5 ta1
-# define REG6 ta2
-# define REG7 ta3
-#endif
-
-/* We load/store 64 bits at a time when USE_DOUBLE is true.
- * The C_ prefix stands for CHUNK and is used to avoid macro name
- * conflicts with system header files. */
-
-#ifdef USE_DOUBLE
-# define C_ST sd
-# define C_LD ld
-# if __MIPSEB
-# define C_LDHI ldl /* high part is left in big-endian */
-# define C_STHI sdl /* high part is left in big-endian */
-# define C_LDLO ldr /* low part is right in big-endian */
-# define C_STLO sdr /* low part is right in big-endian */
-# else
-# define C_LDHI ldr /* high part is right in little-endian */
-# define C_STHI sdr /* high part is right in little-endian */
-# define C_LDLO ldl /* low part is left in little-endian */
-# define C_STLO sdl /* low part is left in little-endian */
-# endif
-# define C_ALIGN dalign /* r6 align instruction */
-#else
-# define C_ST sw
-# define C_LD lw
-# if __MIPSEB
-# define C_LDHI lwl /* high part is left in big-endian */
-# define C_STHI swl /* high part is left in big-endian */
-# define C_LDLO lwr /* low part is right in big-endian */
-# define C_STLO swr /* low part is right in big-endian */
-# else
-# define C_LDHI lwr /* high part is right in little-endian */
-# define C_STHI swr /* high part is right in little-endian */
-# define C_LDLO lwl /* low part is left in little-endian */
-# define C_STLO swl /* low part is left in little-endian */
-# endif
-# define C_ALIGN align /* r6 align instruction */
-#endif
-
-/* Bookkeeping values for 32 vs. 64 bit mode. */
-#ifdef USE_DOUBLE
-# define NSIZE 8
-# define NSIZEMASK 0x3f
-# define NSIZEDMASK 0x7f
-#else
-# define NSIZE 4
-# define NSIZEMASK 0x1f
-# define NSIZEDMASK 0x3f
-#endif
-#define UNIT(unit) ((unit)*NSIZE)
-#define UNITM1(unit) (((unit)*NSIZE)-1)
-
-#ifdef __ANDROID__
-LEAF(MEMCPY_NAME, 0)
-#else
-LEAF(MEMCPY_NAME)
-#endif
- .set nomips16
- .set noreorder
-/*
- * Below we handle the case where memcpy is called with overlapping src and dst.
- * Although memcpy is not required to handle this case, some parts of Android
- * like Skia rely on such usage. We call memmove to handle such cases.
- */
-#ifdef USE_MEMMOVE_FOR_OVERLAP
- PTR_SUBU t0,a0,a1
- PTR_SRA t2,t0,31
- xor t1,t0,t2
- PTR_SUBU t0,t1,t2
- sltu t2,t0,a2
- beq t2,zero,L(memcpy)
- nop
-#if defined(__LP64__)
- daddiu sp,sp,-8
- SETUP_GP64(0,MEMCPY_NAME)
- LA t9,memmove
- RESTORE_GP64
- jr t9
- daddiu sp,sp,8
-#else
- LA t9,memmove
- jr t9
- nop
-#endif
-L(memcpy):
-#endif
-/*
- * If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
- * size, copy dst pointer to v0 for the return value.
- */
- slti t2,a2,(2 * NSIZE)
- bne t2,zero,L(lastb)
-#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
- move v0,zero
-#else
- move v0,a0
-#endif
-
-#ifndef R6_CODE
-
-/*
- * If src and dst have different alignments, go to L(unaligned), if they
- * have the same alignment (but are not actually aligned) do a partial
- * load/store to make them aligned. If they are both already aligned
- * we can start copying at L(aligned).
- */
- xor t8,a1,a0
- andi t8,t8,(NSIZE-1) /* t8 is a0/a1 word-displacement */
- bne t8,zero,L(unaligned)
- PTR_SUBU a3, zero, a0
-
- andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
- beq a3,zero,L(aligned) /* if a3=0, it is already aligned */
- PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
-
- C_LDHI t8,0(a1)
- PTR_ADDU a1,a1,a3
- C_STHI t8,0(a0)
- PTR_ADDU a0,a0,a3
-
-#else /* R6_CODE */
-
-/*
- * Align the destination and hope that the source gets aligned too. If it
- * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
- * align instruction.
- */
- andi t8,a0,7
- lapc t9,L(atable)
- PTR_LSA t9,t8,t9,2
- jrc t9
-L(atable):
- bc L(lb0)
- bc L(lb7)
- bc L(lb6)
- bc L(lb5)
- bc L(lb4)
- bc L(lb3)
- bc L(lb2)
- bc L(lb1)
-L(lb7):
- lb a3, 6(a1)
- sb a3, 6(a0)
-L(lb6):
- lb a3, 5(a1)
- sb a3, 5(a0)
-L(lb5):
- lb a3, 4(a1)
- sb a3, 4(a0)
-L(lb4):
- lb a3, 3(a1)
- sb a3, 3(a0)
-L(lb3):
- lb a3, 2(a1)
- sb a3, 2(a0)
-L(lb2):
- lb a3, 1(a1)
- sb a3, 1(a0)
-L(lb1):
- lb a3, 0(a1)
- sb a3, 0(a0)
-
- li t9,8
- subu t8,t9,t8
- PTR_SUBU a2,a2,t8
- PTR_ADDU a0,a0,t8
- PTR_ADDU a1,a1,t8
-L(lb0):
-
- andi t8,a1,(NSIZE-1)
- lapc t9,L(jtable)
- PTR_LSA t9,t8,t9,2
- jrc t9
-L(jtable):
- bc L(aligned)
- bc L(r6_unaligned1)
- bc L(r6_unaligned2)
- bc L(r6_unaligned3)
-# ifdef USE_DOUBLE
- bc L(r6_unaligned4)
- bc L(r6_unaligned5)
- bc L(r6_unaligned6)
- bc L(r6_unaligned7)
-# endif
-#endif /* R6_CODE */
-
-L(aligned):
-
-/*
- * Now dst/src are both aligned to (word or double word) aligned addresses
- * Set a2 to count how many bytes we have to copy after all the 64/128 byte
- * chunks are copied and a3 to the dst pointer after all the 64/128 byte
- * chunks have been copied. We will loop, incrementing a0 and a1 until a0
- * equals a3.
- */
-
- andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
- beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
- PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
- PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
-
-/* When in the loop we may prefetch with the 'prepare to store' hint,
- * in this case the a0+x should not be past the "t0-32" address. This
- * means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
- * for x=64 the last "safe" a0 address is "t0-96" In the current version we
- * will use "prefetch hint,128(a0)", so "t0-160" is the limit.
- */
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
- PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
- PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
-#endif
- PREFETCH_FOR_LOAD (0, a1)
- PREFETCH_FOR_LOAD (1, a1)
- PREFETCH_FOR_LOAD (2, a1)
- PREFETCH_FOR_LOAD (3, a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
- PREFETCH_FOR_STORE (1, a0)
- PREFETCH_FOR_STORE (2, a0)
- PREFETCH_FOR_STORE (3, a0)
-#endif
-#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
-# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
- sltu v1,t9,a0
- bgtz v1,L(skip_set)
- nop
- PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
-L(skip_set):
-# else
- PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
-# endif
-#endif
-#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \
- && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
- PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3)
-# ifdef USE_DOUBLE
- PTR_ADDIU v0,v0,32
-# endif
-#endif
-L(loop16w):
- C_LD t0,UNIT(0)(a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
- sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
- bgtz v1,L(skip_pref)
-#endif
- C_LD t1,UNIT(1)(a1)
-#ifndef R6_CODE
- PREFETCH_FOR_STORE (4, a0)
- PREFETCH_FOR_STORE (5, a0)
-#else
- PREFETCH_FOR_STORE (2, a0)
-#endif
-#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
- PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
-# ifdef USE_DOUBLE
- PTR_ADDIU v0,v0,32
-# endif
-#endif
-L(skip_pref):
- C_LD REG2,UNIT(2)(a1)
- C_LD REG3,UNIT(3)(a1)
- C_LD REG4,UNIT(4)(a1)
- C_LD REG5,UNIT(5)(a1)
- C_LD REG6,UNIT(6)(a1)
- C_LD REG7,UNIT(7)(a1)
-#ifndef R6_CODE
- PREFETCH_FOR_LOAD (4, a1)
-#else
- PREFETCH_FOR_LOAD (3, a1)
-#endif
- C_ST t0,UNIT(0)(a0)
- C_ST t1,UNIT(1)(a0)
- C_ST REG2,UNIT(2)(a0)
- C_ST REG3,UNIT(3)(a0)
- C_ST REG4,UNIT(4)(a0)
- C_ST REG5,UNIT(5)(a0)
- C_ST REG6,UNIT(6)(a0)
- C_ST REG7,UNIT(7)(a0)
-
- C_LD t0,UNIT(8)(a1)
- C_LD t1,UNIT(9)(a1)
- C_LD REG2,UNIT(10)(a1)
- C_LD REG3,UNIT(11)(a1)
- C_LD REG4,UNIT(12)(a1)
- C_LD REG5,UNIT(13)(a1)
- C_LD REG6,UNIT(14)(a1)
- C_LD REG7,UNIT(15)(a1)
-#ifndef R6_CODE
- PREFETCH_FOR_LOAD (5, a1)
-#endif
- C_ST t0,UNIT(8)(a0)
- C_ST t1,UNIT(9)(a0)
- C_ST REG2,UNIT(10)(a0)
- C_ST REG3,UNIT(11)(a0)
- C_ST REG4,UNIT(12)(a0)
- C_ST REG5,UNIT(13)(a0)
- C_ST REG6,UNIT(14)(a0)
- C_ST REG7,UNIT(15)(a0)
- PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
- bne a0,a3,L(loop16w)
- PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
- move a2,t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes or
- * 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
- * is one. Otherwise jump down to L(chk1w) to handle the tail end of
- * the copy.
- */
-
-L(chkw):
- PREFETCH_FOR_LOAD (0, a1)
- andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
- /* The t8 is the reminder count past 32-bytes */
- beq a2,t8,L(chk1w) /* When a2=t8, no 32-byte chunk */
- nop
- C_LD t0,UNIT(0)(a1)
- C_LD t1,UNIT(1)(a1)
- C_LD REG2,UNIT(2)(a1)
- C_LD REG3,UNIT(3)(a1)
- C_LD REG4,UNIT(4)(a1)
- C_LD REG5,UNIT(5)(a1)
- C_LD REG6,UNIT(6)(a1)
- C_LD REG7,UNIT(7)(a1)
- PTR_ADDIU a1,a1,UNIT(8)
- C_ST t0,UNIT(0)(a0)
- C_ST t1,UNIT(1)(a0)
- C_ST REG2,UNIT(2)(a0)
- C_ST REG3,UNIT(3)(a0)
- C_ST REG4,UNIT(4)(a0)
- C_ST REG5,UNIT(5)(a0)
- C_ST REG6,UNIT(6)(a0)
- C_ST REG7,UNIT(7)(a0)
- PTR_ADDIU a0,a0,UNIT(8)
-
-/*
- * Here we have less than 32(64) bytes to copy. Set up for a loop to
- * copy one word (or double word) at a time. Set a2 to count how many
- * bytes we have to copy after all the word (or double word) chunks are
- * copied and a3 to the dst pointer after all the (d)word chunks have
- * been copied. We will loop, incrementing a0 and a1 until a0 equals a3.
- */
-L(chk1w):
- andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
- beq a2,t8,L(lastb)
- PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
- PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
-
-/* copying in words (4-byte or 8-byte chunks) */
-L(wordCopy_loop):
- C_LD REG3,UNIT(0)(a1)
- PTR_ADDIU a0,a0,UNIT(1)
- PTR_ADDIU a1,a1,UNIT(1)
- bne a0,a3,L(wordCopy_loop)
- C_ST REG3,UNIT(-1)(a0)
-
-/* Copy the last 8 (or 16) bytes */
-L(lastb):
- blez a2,L(leave)
- PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
-L(lastbloop):
- lb v1,0(a1)
- PTR_ADDIU a0,a0,1
- PTR_ADDIU a1,a1,1
- bne a0,a3,L(lastbloop)
- sb v1,-1(a0)
-L(leave):
- j ra
- nop
-
-#ifndef R6_CODE
-/*
- * UNALIGNED case, got here with a3 = "negu a0"
- * This code is nearly identical to the aligned code above
- * but only the destination (not the source) gets aligned
- * so we need to do partial loads of the source followed
- * by normal stores to the destination (once we have aligned
- * the destination).
- */
-
-L(unaligned):
- andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
- beqz a3,L(ua_chk16w) /* if a3=0, it is already aligned */
- PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
-
- C_LDHI v1,UNIT(0)(a1)
- C_LDLO v1,UNITM1(1)(a1)
- PTR_ADDU a1,a1,a3
- C_STHI v1,UNIT(0)(a0)
- PTR_ADDU a0,a0,a3
-
-/*
- * Now the destination (but not the source) is aligned
- * Set a2 to count how many bytes we have to copy after all the 64/128 byte
- * chunks are copied and a3 to the dst pointer after all the 64/128 byte
- * chunks have been copied. We will loop, incrementing a0 and a1 until a0
- * equals a3.
- */
-
-L(ua_chk16w):
- andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
- beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
- PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
- PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
-
-# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
- PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
- PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
-# endif
- PREFETCH_FOR_LOAD (0, a1)
- PREFETCH_FOR_LOAD (1, a1)
- PREFETCH_FOR_LOAD (2, a1)
-# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
- PREFETCH_FOR_STORE (1, a0)
- PREFETCH_FOR_STORE (2, a0)
- PREFETCH_FOR_STORE (3, a0)
-# endif
-# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
- sltu v1,t9,a0
- bgtz v1,L(ua_skip_set)
- nop
- PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
-L(ua_skip_set):
-# else
- PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
-# endif
-# endif
-L(ua_loop16w):
- PREFETCH_FOR_LOAD (3, a1)
- C_LDHI t0,UNIT(0)(a1)
- C_LDHI t1,UNIT(1)(a1)
- C_LDHI REG2,UNIT(2)(a1)
-# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
- sltu v1,t9,a0
- bgtz v1,L(ua_skip_pref)
-# endif
- C_LDHI REG3,UNIT(3)(a1)
- PREFETCH_FOR_STORE (4, a0)
- PREFETCH_FOR_STORE (5, a0)
-L(ua_skip_pref):
- C_LDHI REG4,UNIT(4)(a1)
- C_LDHI REG5,UNIT(5)(a1)
- C_LDHI REG6,UNIT(6)(a1)
- C_LDHI REG7,UNIT(7)(a1)
- C_LDLO t0,UNITM1(1)(a1)
- C_LDLO t1,UNITM1(2)(a1)
- C_LDLO REG2,UNITM1(3)(a1)
- C_LDLO REG3,UNITM1(4)(a1)
- C_LDLO REG4,UNITM1(5)(a1)
- C_LDLO REG5,UNITM1(6)(a1)
- C_LDLO REG6,UNITM1(7)(a1)
- C_LDLO REG7,UNITM1(8)(a1)
- PREFETCH_FOR_LOAD (4, a1)
- C_ST t0,UNIT(0)(a0)
- C_ST t1,UNIT(1)(a0)
- C_ST REG2,UNIT(2)(a0)
- C_ST REG3,UNIT(3)(a0)
- C_ST REG4,UNIT(4)(a0)
- C_ST REG5,UNIT(5)(a0)
- C_ST REG6,UNIT(6)(a0)
- C_ST REG7,UNIT(7)(a0)
- C_LDHI t0,UNIT(8)(a1)
- C_LDHI t1,UNIT(9)(a1)
- C_LDHI REG2,UNIT(10)(a1)
- C_LDHI REG3,UNIT(11)(a1)
- C_LDHI REG4,UNIT(12)(a1)
- C_LDHI REG5,UNIT(13)(a1)
- C_LDHI REG6,UNIT(14)(a1)
- C_LDHI REG7,UNIT(15)(a1)
- C_LDLO t0,UNITM1(9)(a1)
- C_LDLO t1,UNITM1(10)(a1)
- C_LDLO REG2,UNITM1(11)(a1)
- C_LDLO REG3,UNITM1(12)(a1)
- C_LDLO REG4,UNITM1(13)(a1)
- C_LDLO REG5,UNITM1(14)(a1)
- C_LDLO REG6,UNITM1(15)(a1)
- C_LDLO REG7,UNITM1(16)(a1)
- PREFETCH_FOR_LOAD (5, a1)
- C_ST t0,UNIT(8)(a0)
- C_ST t1,UNIT(9)(a0)
- C_ST REG2,UNIT(10)(a0)
- C_ST REG3,UNIT(11)(a0)
- C_ST REG4,UNIT(12)(a0)
- C_ST REG5,UNIT(13)(a0)
- C_ST REG6,UNIT(14)(a0)
- C_ST REG7,UNIT(15)(a0)
- PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
- bne a0,a3,L(ua_loop16w)
- PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
- move a2,t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes or
- * 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
- * is one. Otherwise jump down to L(ua_chk1w) to handle the tail end of
- * the copy. */
-
-L(ua_chkw):
- PREFETCH_FOR_LOAD (0, a1)
- andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
- /* t8 is the reminder count past 32-bytes */
- beq a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
- nop
- C_LDHI t0,UNIT(0)(a1)
- C_LDHI t1,UNIT(1)(a1)
- C_LDHI REG2,UNIT(2)(a1)
- C_LDHI REG3,UNIT(3)(a1)
- C_LDHI REG4,UNIT(4)(a1)
- C_LDHI REG5,UNIT(5)(a1)
- C_LDHI REG6,UNIT(6)(a1)
- C_LDHI REG7,UNIT(7)(a1)
- C_LDLO t0,UNITM1(1)(a1)
- C_LDLO t1,UNITM1(2)(a1)
- C_LDLO REG2,UNITM1(3)(a1)
- C_LDLO REG3,UNITM1(4)(a1)
- C_LDLO REG4,UNITM1(5)(a1)
- C_LDLO REG5,UNITM1(6)(a1)
- C_LDLO REG6,UNITM1(7)(a1)
- C_LDLO REG7,UNITM1(8)(a1)
- PTR_ADDIU a1,a1,UNIT(8)
- C_ST t0,UNIT(0)(a0)
- C_ST t1,UNIT(1)(a0)
- C_ST REG2,UNIT(2)(a0)
- C_ST REG3,UNIT(3)(a0)
- C_ST REG4,UNIT(4)(a0)
- C_ST REG5,UNIT(5)(a0)
- C_ST REG6,UNIT(6)(a0)
- C_ST REG7,UNIT(7)(a0)
- PTR_ADDIU a0,a0,UNIT(8)
-/*
- * Here we have less than 32(64) bytes to copy. Set up for a loop to
- * copy one word (or double word) at a time.
- */
-L(ua_chk1w):
- andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
- beq a2,t8,L(ua_smallCopy)
- PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
- PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
-
-/* copying in words (4-byte or 8-byte chunks) */
-L(ua_wordCopy_loop):
- C_LDHI v1,UNIT(0)(a1)
- C_LDLO v1,UNITM1(1)(a1)
- PTR_ADDIU a0,a0,UNIT(1)
- PTR_ADDIU a1,a1,UNIT(1)
- bne a0,a3,L(ua_wordCopy_loop)
- C_ST v1,UNIT(-1)(a0)
-
-/* Copy the last 8 (or 16) bytes */
-L(ua_smallCopy):
- beqz a2,L(leave)
- PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
-L(ua_smallCopy_loop):
- lb v1,0(a1)
- PTR_ADDIU a0,a0,1
- PTR_ADDIU a1,a1,1
- bne a0,a3,L(ua_smallCopy_loop)
- sb v1,-1(a0)
-
- j ra
- nop
-
-#else /* R6_CODE */
-
-# if __MIPSEB
-# define SWAP_REGS(X,Y) X, Y
-# define ALIGN_OFFSET(N) (N)
-# else
-# define SWAP_REGS(X,Y) Y, X
-# define ALIGN_OFFSET(N) (NSIZE-N)
-# endif
-# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
- andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \
- beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
- PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \
- /* (d)word chunks. */ \
- move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \
- /* after word loop is finished. */ \
- PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \
- PTR_SUBU REG2, a1, t8; /* REG2 is the aligned src address. */ \
- PTR_ADDU a1, a1, a3; /* a1 is addr of source after word loop. */ \
- C_LD t0, UNIT(0)(REG2); /* Load first part of source. */ \
-L(r6_ua_wordcopy##BYTEOFFSET): \
- C_LD t1, UNIT(1)(REG2); /* Load second part of source. */ \
- C_ALIGN REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET); \
- PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \
- PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
- move t0, t1; /* Move second part of source to first. */ \
- bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
- C_ST REG3, UNIT(-1)(a0); \
- j L(lastb); \
- nop
-
- /* We are generating R6 code, the destination is 4 byte aligned and
- the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
- alignment of the source. */
-
-L(r6_unaligned1):
- R6_UNALIGNED_WORD_COPY(1)
-L(r6_unaligned2):
- R6_UNALIGNED_WORD_COPY(2)
-L(r6_unaligned3):
- R6_UNALIGNED_WORD_COPY(3)
-# ifdef USE_DOUBLE
-L(r6_unaligned4):
- R6_UNALIGNED_WORD_COPY(4)
-L(r6_unaligned5):
- R6_UNALIGNED_WORD_COPY(5)
-L(r6_unaligned6):
- R6_UNALIGNED_WORD_COPY(6)
-L(r6_unaligned7):
- R6_UNALIGNED_WORD_COPY(7)
-# endif
-#endif /* R6_CODE */
-
- .set at
- .set reorder
-END(MEMCPY_NAME)
-#ifndef __ANDROID__
-# ifdef _LIBC
-libc_hidden_builtin_def (MEMCPY_NAME)
-# endif
-#endif
diff --git a/libc/arch-mips/string/memcpy.c b/libc/arch-mips/string/memcpy.c
new file mode 100644
index 0000000..68827b6
--- /dev/null
+++ b/libc/arch-mips/string/memcpy.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses? */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define ENABLE_PREFETCH 1
+
+#if ENABLE_PREFETCH
+#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
+#else
+#define PREFETCH(addr)
+#endif
+
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct
+{
+ reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+typedef unsigned long long reg_t;
+typedef struct
+{
+ reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
+
+typedef union
+{
+ reg_t v;
+ bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(a, i) \
+ a[i] = bw.b.B##i; \
+ len--; \
+ if(!len) return ret; \
+
+/* This code is called when aligning a pointer, there are remaining bytes
+ after doing word compares, or architecture does not have some form
+ of unaligned support. */
+static inline void * __attribute__ ((always_inline))
+do_bytes (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+ unsigned char *y = (unsigned char *) b;
+ unsigned long i;
+
+ /* 'len' might be zero here, so preloading the first two values
+ before the loop may access unallocated memory. */
+ for (i = 0; i < len; i++) {
+ *x = *y;
+ x++;
+ y++;
+ }
+ return ret;
+}
+
+/* This code is called to copy only remaining bytes within word or doubleword */
+static inline void * __attribute__ ((always_inline))
+do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+
+ if(len > 0) {
+ bitfields_t bw;
+ bw.v = *((reg_t*) b);
+
+#if __mips64
+ DO_BYTE(x, 0);
+ DO_BYTE(x, 1);
+ DO_BYTE(x, 2);
+ DO_BYTE(x, 3);
+ DO_BYTE(x, 4);
+ DO_BYTE(x, 5);
+ DO_BYTE(x, 6);
+ DO_BYTE(x, 7);
+#else
+ DO_BYTE(x, 0);
+ DO_BYTE(x, 1);
+ DO_BYTE(x, 2);
+ DO_BYTE(x, 3);
+#endif
+ }
+
+ return ret;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+ the compiler to treat the pointer access as unaligned. */
+struct ulw
+{
+ reg_t uli;
+} __attribute__ ((packed));
+
+/* first pointer is not aligned while second pointer is. */
+static void *
+unaligned_words (struct ulw *a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ unsigned long i, words_by_8, words_by_1;
+ words_by_1 = words % 8;
+ words_by_8 = words >> 3;
+ for (; words_by_8 > 0; words_by_8--) {
+ if(words_by_8 != 1)
+ PREFETCH (b + 8);
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];
+ a[0].uli = y0;
+ a[1].uli = y1;
+ a[2].uli = y2;
+ a[3].uli = y3;
+ a[4].uli = y4;
+ a[5].uli = y5;
+ a[6].uli = y6;
+ a[7].uli = y7;
+ a += 8;
+ b += 8;
+ }
+#else
+ unsigned long i, words_by_4, words_by_1;
+ words_by_1 = words % 4;
+ words_by_4 = words >> 2;
+ for (; words_by_4 > 0; words_by_4--) {
+ if(words_by_4 != 1)
+ PREFETCH (b + 4);
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ a[0].uli = y0;
+ a[1].uli = y1;
+ a[2].uli = y2;
+ a[3].uli = y3;
+ a += 4;
+ b += 4;
+ }
+#endif
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++) {
+ a->uli = *b;
+ a += 1;
+ b += 1;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes_remaining (a, b, bytes, ret);
+}
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions. */
+static void *
+unaligned_words (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+ unsigned long i;
+ unsigned char *x = (unsigned char *) a;
+
+ for (i = 0; i < words; i++) {
+ bitfields_t bw;
+ bw.v = *((reg_t*) b);
+ x = (unsigned char *) a;
+#if __mips64
+ x[0] = bw.b.B0;
+ x[1] = bw.b.B1;
+ x[2] = bw.b.B2;
+ x[3] = bw.b.B3;
+ x[4] = bw.b.B4;
+ x[5] = bw.b.B5;
+ x[6] = bw.b.B6;
+ x[7] = bw.b.B7;
+#else
+ x[0] = bw.b.B0;
+ x[1] = bw.b.B1;
+ x[2] = bw.b.B2;
+ x[3] = bw.b.B3;
+#endif
+ a += 1;
+ b += 1;
+ }
+
+ /* mop up any remaining bytes */
+ return do_bytes_remaining (a, b, bytes, ret);
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned. */
+static void *
+aligned_words (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ unsigned long i, words_by_8, words_by_1;
+ words_by_1 = words % 8;
+ words_by_8 = words >> 3;
+ for (; words_by_8 > 0; words_by_8--) {
+ if(words_by_8 != 1)
+ PREFETCH (b + 8);
+ reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
+ reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];
+ a[0] = x0;
+ a[1] = x1;
+ a[2] = x2;
+ a[3] = x3;
+ a[4] = x4;
+ a[5] = x5;
+ a[6] = x6;
+ a[7] = x7;
+ a += 8;
+ b += 8;
+ }
+#else
+ unsigned long i, words_by_4, words_by_1;
+ words_by_1 = words % 4;
+ words_by_4 = words >> 2;
+ for (; words_by_4 > 0; words_by_4--) {
+ if(words_by_4 != 1)
+ PREFETCH (b + 4);
+ reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
+ a[0] = x0;
+ a[1] = x1;
+ a[2] = x2;
+ a[3] = x3;
+ a += 4;
+ b += 4;
+ }
+#endif
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++) {
+ *a = *b;
+ a += 1;
+ b += 1;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes_remaining (a, b, bytes, ret);
+}
+
+void *
+memcpy (void *a, const void *b, size_t len) __overloadable
+{
+ unsigned long bytes, words;
+ void *ret = a;
+
+ /* shouldn't hit that often. */
+ if (len < sizeof (reg_t) * 4) {
+ return do_bytes (a, b, len, a);
+ }
+
+ /* Align the second pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) b) % sizeof (reg_t);
+ if (bytes) {
+ bytes = sizeof (reg_t) - bytes;
+ if (bytes > len)
+ bytes = len;
+ do_bytes (a, b, bytes, ret);
+ if (len == bytes)
+ return ret;
+ len -= bytes;
+ a = (void *) (((unsigned char *) a) + bytes);
+ b = (const void *) (((unsigned char *) b) + bytes);
+ }
+
+ /* Second pointer now aligned. */
+ words = len / sizeof (reg_t);
+ bytes = len % sizeof (reg_t);
+#if HW_UNALIGNED_SUPPORT
+ /* treat possible unaligned first pointer as aligned. */
+ return aligned_words (a, b, words, bytes, ret);
+#else
+ if (((unsigned long) a) % sizeof (reg_t) == 0) {
+ return aligned_words (a, b, words, bytes, ret);
+ }
+ /* need to use unaligned instructions on first pointer. */
+ return unaligned_words (a, b, words, bytes, ret);
+#endif
+}
diff --git a/libc/arch-mips/string/memmove.c b/libc/arch-mips/string/memmove.c
new file mode 100644
index 0000000..fbff297
--- /dev/null
+++ b/libc/arch-mips/string/memmove.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses? */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define ENABLE_PREFETCH 1
+
+#if ENABLE_PREFETCH
+#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
+#else
+#define PREFETCH(addr)
+#endif
+
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct
+{
+ reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+typedef unsigned long long reg_t;
+typedef struct
+{
+ reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
+
+typedef union
+{
+ reg_t v;
+ bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(a, i) \
+ a[i] = bw.b.B##i; \
+ len--; \
+ if(!len) return ret; \
+
+/* This code is called when aligning a pointer, there are remaining bytes
+ after doing word compares, or architecture does not have some form
+ of unaligned support. */
+static inline void * __attribute__ ((always_inline))
+do_bytes (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+ unsigned char *y = (unsigned char *) b;
+ unsigned long i;
+
+ /* 'len' might be zero here, so preloading the first two values
+ before the loop may access unallocated memory. */
+ for (i = 0; i < len; i++)
+ {
+ *x = *y;
+ x++;
+ y++;
+ }
+ return ret;
+}
+
+static inline void * __attribute__ ((always_inline))
+do_bytes_backward (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+ unsigned char *y = (unsigned char *) b;
+ unsigned long i;
+
+ /* 'len' might be zero here, so preloading the first two values
+ before the loop may access unallocated memory. */
+ for (i = 0; i < len; i++) {
+ *--x = *--y;
+ }
+ return ret;
+}
+
+static inline void * __attribute__ ((always_inline))
+do_bytes_aligned (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+
+ if(len > 0) {
+ bitfields_t bw;
+ bw.v = *((reg_t*) b);
+
+#if __mips64
+ DO_BYTE(x, 0);
+ DO_BYTE(x, 1);
+ DO_BYTE(x, 2);
+ DO_BYTE(x, 3);
+ DO_BYTE(x, 4);
+ DO_BYTE(x, 5);
+ DO_BYTE(x, 6);
+ DO_BYTE(x, 7);
+#else
+ DO_BYTE(x, 0);
+ DO_BYTE(x, 1);
+ DO_BYTE(x, 2);
+ DO_BYTE(x, 3);
+#endif
+ }
+
+ return ret;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+ the compiler to treat the pointer access as unaligned. */
+struct ulw
+{
+ reg_t uli;
+} __attribute__ ((packed));
+
+#define STORE_UNALIGNED_8(a, b) \
+{ \
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; \
+ reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7]; \
+ a[0].uli = y0; \
+ a[1].uli = y1; \
+ a[2].uli = y2; \
+ a[3].uli = y3; \
+ a[4].uli = y4; \
+ a[5].uli = y5; \
+ a[6].uli = y6; \
+ a[7].uli = y7; \
+}
+
+#define STORE_UNALIGNED_4(a, b) \
+{ \
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; \
+ a[0].uli = y0; \
+ a[1].uli = y1; \
+ a[2].uli = y2; \
+ a[3].uli = y3; \
+}
+
+/* first pointer is not aligned while second pointer is. */
+static void *
+unaligned_words_forward (struct ulw *a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ unsigned long i, words_by_8, words_by_1;
+ words_by_1 = words % 8;
+ words_by_8 = words >> 3;
+ for (; words_by_8 > 0; words_by_8--) {
+ if(words_by_8 != 1)
+ PREFETCH (b + 8);
+ STORE_UNALIGNED_8(a, b);
+ a += 8;
+ b += 8;
+ }
+#else
+ unsigned long i, words_by_4, words_by_1;
+ words_by_1 = words % 4;
+ words_by_4 = words >> 2;
+ for (; words_by_4 > 0; words_by_4--) {
+ if(words_by_4 != 1)
+ PREFETCH (b + 4);
+ STORE_UNALIGNED_4(a, b);
+ a += 4;
+ b += 4;
+ }
+#endif
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++) {
+ a->uli = *b;
+ a += 1;
+ b += 1;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes_aligned (a, b, bytes, ret);
+}
+
+static void *
+unaligned_words_backward (struct ulw *a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ unsigned long i, words_by_8, words_by_1;
+ words_by_1 = words % 8;
+ words_by_8 = words >> 3;
+ for (; words_by_8 > 0; words_by_8--) {
+ if(words_by_8 != 1)
+ PREFETCH (b - 16);
+ a -= 8;
+ b -= 8;
+ STORE_UNALIGNED_8(a, b);
+ }
+#else
+ unsigned long i, words_by_4, words_by_1;
+ words_by_1 = words % 4;
+ words_by_4 = words >> 2;
+ for (; words_by_4 > 0; words_by_4--) {
+ if(words_by_4 != 1)
+ PREFETCH (b - 8);
+ a -= 4;
+ b -= 4;
+ STORE_UNALIGNED_4(a, b);
+ }
+#endif
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++) {
+ a -= 1;
+ b -= 1;
+ a->uli = *b;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes_backward (a, b, bytes, ret);
+}
+
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions. */
+static void *
+unaligned_words_forward (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+ return do_bytes_aligned (a, b, (sizeof (reg_t) * words) + bytes, ret);
+}
+
+static void *
+unaligned_words_backward (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+ return do_bytes_backward (a, b, (sizeof (reg_t) * words) + bytes, ret);
+}
+
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned. */
+
+#define STORE_ALIGNED_8(a, b) \
+{ \
+ reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; \
+ reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7]; \
+ a[0] = x0; \
+ a[1] = x1; \
+ a[2] = x2; \
+ a[3] = x3; \
+ a[4] = x4; \
+ a[5] = x5; \
+ a[6] = x6; \
+ a[7] = x7; \
+}
+
+#define STORE_ALIGNED_4(a, b) \
+{ \
+ reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; \
+ a[0] = x0; \
+ a[1] = x1; \
+ a[2] = x2; \
+ a[3] = x3; \
+}
+
+static void *
+aligned_words_forward (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ unsigned long i, words_by_8, words_by_1;
+ words_by_1 = words % 8;
+ words_by_8 = words >> 3;
+ for (; words_by_8 > 0; words_by_8--) {
+ if(words_by_8 != 1)
+ PREFETCH (b + 8);
+ STORE_ALIGNED_8(a, b);
+ a += 8;
+ b += 8;
+ }
+#else
+ unsigned long i, words_by_4, words_by_1;
+ words_by_1 = words % 4;
+ words_by_4 = words >> 2;
+ for (; words_by_4 > 0; words_by_4--) {
+ if(words_by_4 != 1)
+ PREFETCH (b + 4);
+ STORE_ALIGNED_4(a, b);
+ a += 4;
+ b += 4;
+ }
+#endif
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++) {
+ *a = *b;
+ a += 1;
+ b += 1;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes_aligned (a, b, bytes, ret);
+}
+
+
+static void *
+aligned_words_backward (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
+ unsigned long i, words_by_8, words_by_1;
+ words_by_1 = words % 8;
+ words_by_8 = words >> 3;
+ for (; words_by_8 > 0; words_by_8--) {
+ if(words_by_8 != 1)
+ PREFETCH (b - 16);
+ a -= 8;
+ b -= 8;
+ STORE_ALIGNED_8(a, b);
+ }
+#else
+ unsigned long i, words_by_4, words_by_1;
+ words_by_1 = words % 4;
+ words_by_4 = words >> 2;
+ for (; words_by_4 > 0; words_by_4--) {
+ if(words_by_4 != 1)
+ PREFETCH (b - 8);
+ a -= 4;
+ b -= 4;
+ STORE_ALIGNED_4(a, b);
+ }
+#endif
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++) {
+ a -= 1;
+ b -= 1;
+ *a = *b;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes_backward (a, b, bytes, ret);
+}
+
+void *
+memmove (void *dst0, const void *src0, size_t length) __overloadable
+{
+ unsigned long bytes, words;
+ void *ret = dst0;
+
+ if (length == 0 || dst0 == src0) /* nothing to do */
+ return dst0;
+
+ if ((unsigned long)dst0 < (unsigned long)src0) {
+ /* Copy forwards. */
+ /* This shouldn't hit that often. */
+ if (length < sizeof (reg_t) * 4) {
+ return do_bytes (dst0, src0, length, ret);
+ }
+
+ /* Align the second pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) src0) % sizeof (reg_t);
+ if (bytes) {
+ bytes = sizeof (reg_t) - bytes;
+ if (bytes > length)
+ bytes = length;
+ do_bytes (dst0, src0, bytes, ret);
+ if (length == bytes)
+ return ret;
+ length -= bytes;
+ dst0 = (void *) (((unsigned char *) dst0) + bytes);
+ src0 = (const void *) (((unsigned char *) src0) + bytes);
+ }
+
+ /* Second pointer now aligned. */
+ words = length / sizeof (reg_t);
+ bytes = length % sizeof (reg_t);
+#if HW_UNALIGNED_SUPPORT
+ /* treat possible unaligned first pointer as aligned. */
+ return aligned_words_forward (dst0, src0, words, bytes, ret);
+#else
+ if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
+ return aligned_words_forward (dst0, src0, words, bytes, ret);
+ }
+ /* need to use unaligned instructions on first pointer. */
+ return unaligned_words_forward (dst0, src0, words, bytes, ret);
+#endif
+ } else {
+ /* Copy backwards. */
+ dst0 = (void *) (((unsigned char *) dst0) + length);
+ src0 = (const void *) (((unsigned char *) src0) + length);
+
+ /* This shouldn't hit that often. */
+ if (length < sizeof (reg_t) * 4) {
+ return do_bytes_backward (dst0, src0, length, ret);
+ }
+
+ /* Align the second pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) src0) % sizeof (reg_t);
+ if (bytes) {
+ if (bytes > length)
+ bytes = length;
+ do_bytes_backward (dst0, src0, bytes, ret);
+ if (length == bytes)
+ return ret;
+ length -= bytes;
+ dst0 = (void *) (((unsigned char *) dst0) - bytes);
+ src0 = (const void *) (((unsigned char *) src0) - bytes);
+ }
+
+ words = length / sizeof (reg_t);
+ bytes = length % sizeof (reg_t);
+#if HW_UNALIGNED_SUPPORT
+ /* treat possible unaligned first pointer as aligned. */
+ return aligned_words_backward ((void *)dst0, (void *)src0, words, bytes, ret);
+#else
+ if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
+ return aligned_words_backward (dst0, src0, words, bytes, ret);
+ }
+ /* need to use unaligned instructions on first pointer. */
+ return unaligned_words_backward (dst0, src0, words, bytes, ret);
+#endif
+ }
+}
diff --git a/libc/arch-mips/string/mips-string-ops.h b/libc/arch-mips/string/mips-string-ops.h
deleted file mode 100644
index 50f7e3a..0000000
--- a/libc/arch-mips/string/mips-string-ops.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2010 MIPS Technologies, Inc.
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with
- * the distribution.
- * * Neither the name of MIPS Technologies Inc. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __MIPS_STRING_OPS_H
-#define __MIPS_STRING_OPS_H
- /* This definition of the byte bitfields uses the
- assumption that the layout of the bitfields is
- equivalent to the layout in memory. Generally,
- for the MIPS ABIs, this is true. If you compile
- the strcmp.c file with -DSMOKE_TEST_NEW_STRCMP,
- this assumption will be tested.
-
- Also, regardless of char signedness, ANSI C dictates that
- strcmp() treats each character as unsigned char. For
- strlen and the like, signedness doesn't matter.
-
- Also, this code assumes that there are 8-bits per 'char'. */
-
-#if __mips64
-typedef struct bits
-{
- unsigned B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
-} bits_t;
-#else
-typedef struct bits
-{
- unsigned B0:8, B1:8, B2:8, B3:8;
-} bits_t;
-#endif
-
-#ifndef _ULW
- /* for MIPS GCC, there is no unaligned builtins - so this code forces
- the compiler to treat the pointer access as unaligned. */
-struct ulw
-{
- unsigned b;
-} __attribute__ ((packed));
-
-#define _ULW(__x) ((struct ulw *) ((char *)(&__x)))->b;
-#endif
-
-/* This union assumes that small structures can be in registers. If
- not, then memory accesses will be done - not optimal, but ok. */
-typedef union
-{
- unsigned v;
- bits_t b;
-} bitfields_t;
-
-#ifndef detect_zero
-/* __mips_dsp, __mips_dspr2, and __mips64 are predefined by
- the compiler, based on command line options. */
-#if (__mips_dsp || __mips_dspr2) && !__mips64
-#define __mips_using_dsp 1
-
-/* DSP 4-lane (8 unsigned bits per line) subtract and saturate
- * Intrinsic operation. How this works:
- * Given a 4-byte string of "ABC\0", subtract this as
- * an unsigned integer from 0x01010101:
- * 0x01010101
- * - 0x41424300
- * -----------
- ( 0xbfbebe01 <-- answer without saturation
- * 0x00000001 <-- answer with saturation
- * When this 4-lane vector is treated as an unsigned int value,
- * a non-zero answer indicates the presence of a zero in the
- * original 4-byte argument. */
-
-typedef signed char v4i8 __attribute__ ((vector_size (4)));
-
-#define detect_zero(__x,__y,__01s,__80s)\
- ((unsigned) __builtin_mips_subu_s_qb((v4i8) __01s,(v4i8) __x))
-
- /* sets all 4 lanes to requested byte. */
-#define set_byte_lanes(__x) ((unsigned) __builtin_mips_repl_qb(__x))
-
- /* sets all 4 lanes to 0x01. */
-#define def_and_set_01(__x) unsigned __x = (unsigned) __builtin_mips_repl_qb(0x01)
-
- /* sets all 4 lanes to 0x80. Not needed when subu_s.qb used. */
-#define def_and_set_80(__x) /* do nothing */
-
-#else
- /* this version, originally published in the 80's, uses
- a reverse-carry-set like determination of the zero byte.
- The steps are, for __x = 0x31ff0001:
- __x - _01s = 0x30fdff00
- ~__x = 0xce00fffe
- ((__x - _01s) & ~__x) = 0x0000ff00
- x & _80s = 0x00008000 <- byte 3 was zero
- Some implementaions naively assume that characters are
- always 7-bit unsigned ASCII. With that assumption, the
- "& ~x" is usually discarded. Since character strings
- are 8-bit, the and is needed to catch the case of
- a false positive when the byte is 0x80. */
-
-#define detect_zero(__x,__y,_01s,_80s)\
- ((unsigned) (((__x) - _01s) & ~(__x)) & _80s)
-
-#if __mips64
-#define def_and_set_80(__x) unsigned __x = 0x8080808080808080ul
-#define def_and_set_01(__x) unsigned __x = 0x0101010101010101ul
-#else
-#define def_and_set_80(__x) unsigned __x = 0x80808080ul
-#define def_and_set_01(__x) unsigned __x = 0x01010101ul
-#endif
-
-#endif
-#endif
-
-/* dealing with 'void *' conversions without using extra variables. */
-#define get_byte(__x,__idx) (((unsigned char *) (__x))[__idx])
-#define set_byte(__x,__idx,__fill) ((unsigned char *) (__x))[__idx] = (__fill)
-#define get_word(__x,__idx) (((unsigned *) (__x))[__idx])
-#define set_word(__x,__idx,__fill) ((unsigned *) (__x))[__idx] = (__fill)
-#define inc_ptr_as(__type,__x,__inc) __x = (void *) (((__type) __x) + (__inc))
-#define cvt_ptr_to(__type,__x) ((__type) (__x))
-
-#endif
diff --git a/libc/arch-mips/string/mips_strlen.c b/libc/arch-mips/string/mips_strlen.c
deleted file mode 100644
index f1465f2..0000000
--- a/libc/arch-mips/string/mips_strlen.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2010 MIPS Technologies, Inc.
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with
- * the distribution.
- * * Neither the name of MIPS Technologies Inc. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include "mips-string-ops.h"
-
-#define do_strlen_word(__av) {\
- if (detect_zero(x,x,_01s,_80s)) break;\
- x = __av;\
- cnt += sizeof (unsigned);\
- }
-
-#define do_strlen_byte(__x) {\
- if ((bx.b.B##__x) == 0) break;\
- ++cnt;\
- }
-
-#if SMOKE_TEST_MIPS_STRLEN
-#define strlen my_strlen
-#endif
-
-size_t
-strlen (const char *_a) __overloadable
-{
- int cnt = 0;
- unsigned x;
-
- /* align the string to word boundary so we can do word at a time. */
- if ((cvt_ptr_to (unsigned, _a) & (sizeof (unsigned) - 1)) != 0)
- {
- if ((cvt_ptr_to (unsigned, _a) & 1) != 0)
- {
- if (get_byte (_a, 0) == 0)
- return cnt;
- /* set bit 1 so 2-bytes are checked and incremented. */
- inc_ptr_as (char *, _a, 1);
- ++cnt;
- }
- if ((cvt_ptr_to (unsigned, _a) & 2) != 0)
- {
- if (get_byte (_a, 0) == 0)
- return cnt + 0;
- if (get_byte (_a, 1) == 0)
- return cnt + 1;
- inc_ptr_as (char *, _a, 2);
- cnt += 2;
- }
- }
-
-#if __mips64
-#error strlen: mips64 check for 4-byte alignment not implemented.
-#endif
-
- if (1)
- {
- def_and_set_01 (_01s);
- def_and_set_80 (_80s);
-
- /* as advantagous as it is to performance, this code cannot pre-load
- the following word, nor can it prefetch the next line at the start
- of the loop since the string can be at the end of a page with the
- following page unmapped. There are tests in the suite to catch
- any attempt to go beyond the current word. */
- x = get_word (_a, 0);
- while (1)
- {
- /* doing 8 words should cover most strings. */
- do_strlen_word (get_word (_a, 1));
- do_strlen_word (get_word (_a, 2));
- do_strlen_word (get_word (_a, 3));
- do_strlen_word (get_word (_a, 4));
- do_strlen_word (get_word (_a, 5));
- do_strlen_word (get_word (_a, 6));
- do_strlen_word (get_word (_a, 7));
- do_strlen_word (get_word (_a, 8));
- inc_ptr_as (unsigned *, _a, 8);
- }
- }
- while (1)
- {
- /* pull apart the last word processed and find the zero. */
- bitfields_t bx;
- bx.v = x;
-#if __mips64
- do_strlen_byte (0);
- do_strlen_byte (1);
- do_strlen_byte (2);
- do_strlen_byte (3);
- do_strlen_byte (4);
- do_strlen_byte (5);
- do_strlen_byte (6);
-#else
- do_strlen_byte (0);
- do_strlen_byte (1);
- do_strlen_byte (2);
-#endif
- /* last byte is zero */
- break;
- }
- return cnt;
-}
-
-#undef do_strlen_byte
-#undef do_strlen_word
-
-#if SMOKE_TEST_MIPS_STRLEN
-#include <stdio.h>
-char str1[] = "DHRYSTONE PROGRAM, 1'ST STRING";
-char str2[] = "DHRYSTONE PROGRAM, 2'ST STRING";
-
-char str3[] = "another string";
-char str4[] = "another";
-
-char str5[] = "somes tring";
-char str6[] = "somes_tring";
-
-char str7[16], str8[16];
-
-static char *
-chk (unsigned mine, unsigned libs, int *errors)
-{
- static char answer[1024];
- char *result = mine == libs ? "PASS" : "FAIL";
- sprintf (answer, "new_strlen=%d: lib_strlen=%d: %s!", mine, libs, result);
- if (mine != libs)
- (*errors)++;
- return answer;
-}
-
-int
-main (int argc, char **argv)
-{
- int errors = 0;
- /* set -1 in one position */
- str6[5] = 0xff;
- /* set zero in same position with junk in following 3 */
- str7[0] = str8[0] = 0;
- str7[1] = 0xff;
- str7[2] = 'a';
- str7[3] = 2;
- str8[1] = 's';
- str8[2] = -2;
- str8[3] = 0;
-
- fprintf (stderr, "========== mips_strlen%s test...\n",
- argv[0] ? argv[0] : "unknown strlen");
-#define P(__x,__y) {\
- int a = my_strlen(__x + __y);\
- int b = (strlen)(__x + __y) /* library version */;\
- fprintf(stderr,"%s+%d: %s\n",#__x,__y,chk(a,b,&errors));\
- }
-
- P (str1, 0);
- P (str1, 1);
- P (str1, 2);
- P (str1, 3);
-
- P (str2, 0);
- P (str2, 1);
- P (str2, 2);
- P (str2, 3);
-
- P (str3, 0);
- P (str3, 1);
- P (str3, 2);
- P (str3, 3);
-
- P (str4, 0);
- P (str4, 1);
- P (str4, 2);
- P (str4, 3);
-
- P (str5, 0);
- P (str5, 1);
- P (str5, 2);
- P (str5, 3);
-
- P (str6, 0);
- P (str6, 1);
- P (str6, 2);
- P (str6, 3);
-
- P (str7, 0);
- P (str7, 1);
- P (str7, 2);
- P (str7, 3);
-
- P (str8, 0);
- P (str8, 1);
- P (str8, 2);
- P (str8, 3);
-
- return errors;
-}
-#endif
diff --git a/libc/arch-mips/string/strchr.c b/libc/arch-mips/string/strchr.c
new file mode 100644
index 0000000..c9397e7
--- /dev/null
+++ b/libc/arch-mips/string/strchr.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#define op_t unsigned long int
+#define op_size sizeof (op_t)
+
+#if __mips64
+typedef struct
+{
+ op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#else
+typedef struct
+{
+ op_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#endif
+
+typedef union
+{
+ op_t v;
+ bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(i) \
+ if (a.b.B##i != ch) { \
+ if(a.b.B##i == '\0') return 0; \
+ p++; \
+ } else \
+ return (char *)p;
+
+#define DO_WORD(w, cnt) { \
+ op_t val = w[cnt] ^ mask_c; \
+ if ((((w[cnt] - mask_1) & ~w[cnt]) & mask_128) || \
+ (((val - mask_1) & ~val) & mask_128)) { \
+ return do_bytes(w + cnt, ch); \
+ } \
+}
+
+static inline char * __attribute__ ((always_inline))
+do_bytes (const op_t* w, unsigned char ch)
+{
+ bitfields_t a;
+ unsigned char* p = (unsigned char *) w;
+ a.v = *w;
+#if __mips64
+ DO_BYTE(0)
+ DO_BYTE(1)
+ DO_BYTE(2)
+ DO_BYTE(3)
+ DO_BYTE(4)
+ DO_BYTE(5)
+ DO_BYTE(6)
+ DO_BYTE(7)
+#else
+ DO_BYTE(0)
+ DO_BYTE(1)
+ DO_BYTE(2)
+ DO_BYTE(3)
+#endif
+ return (char *)p;
+}
+
+char* strchr(const char* s, int c) __overloadable
+{
+ const op_t *w;
+ op_t mask_1, mask_128, mask_c;
+ const unsigned char ch = c;
+ unsigned char* p = (unsigned char *) s;
+
+ /*
+ * Check byte by byte till initial alignment
+ */
+ for ( ; *p != ch && ((size_t) p % op_size) != 0; p++)
+ if (*p == '\0')
+ return 0;
+
+ if (*p != ch) {
+ w = (const op_t *) p;
+
+ mask_c = ch | (ch << 8);
+ mask_c |= mask_c << 16;
+ __asm__ volatile (
+ "li %0, 0x01010101 \n\t"
+ : "=r" (mask_1)
+ );
+#if __mips64
+ mask_1 |= mask_1 << 32;
+ mask_c |= mask_c << 32;
+#endif
+ mask_128 = mask_1 << 7;
+
+ /*
+ * Check word/dword wize after initial alignment till character match
+ * or end of string
+ */
+ while (1) {
+ DO_WORD(w, 0)
+ DO_WORD(w, 1)
+ DO_WORD(w, 2)
+ DO_WORD(w, 3)
+ w += 4;
+ }
+ }
+
+ return (char *)p;
+}
diff --git a/libc/arch-mips/string/strcmp.S b/libc/arch-mips/string/strcmp.S
index 2b67f5a..e1faf2d 100644
--- a/libc/arch-mips/string/strcmp.S
+++ b/libc/arch-mips/string/strcmp.S
@@ -1,30 +1,33 @@
/*
- * Copyright (c) 2014
- * Imagination Technologies Limited.
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __ANDROID__
@@ -41,6 +44,22 @@
# include <sys/asm.h>
#endif
+#if __mips64
+# define NSIZE 8
+# define LW ld
+# define EXT dext
+# define SRL dsrl
+# define SLL dsll
+# define SUBU dsubu
+#else
+# define NSIZE 4
+# define LW lw
+# define EXT ext
+# define SRL srl
+# define SLL sll
+# define SUBU subu
+#endif
+
/* Technically strcmp should not read past the end of the strings being
compared. We will read a full word that may contain excess bits beyond
the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
@@ -77,6 +96,23 @@
# endif
#endif
+/* It might seem better to do the 'beq' instruction between the two 'lbu'
+ instructions so that the nop is not needed but testing showed that this
+ code is actually faster (based on glibc strcmp test). */
+#define BYTECMP01(OFFSET) \
+ lbu v0, OFFSET(a0); \
+ lbu v1, OFFSET(a1); \
+ beq v0, zero, L(bexit01); \
+ nop; \
+ bne v0, v1, L(bexit01)
+
+#define BYTECMP89(OFFSET) \
+ lbu t8, OFFSET(a0); \
+ lbu t9, OFFSET(a1); \
+ beq t8, zero, L(bexit89); \
+ nop; \
+ bne t8, t9, L(bexit89)
+
/* Allow the routine to be named something else if desired. */
#ifndef STRCMP_NAME
# define STRCMP_NAME strcmp
@@ -87,170 +123,236 @@
#else
LEAF(STRCMP_NAME)
#endif
- .set nomips16
- .set noreorder
+ .set nomips16
+ .set noreorder
- or t0, a0, a1
- andi t0,0x3
- bne t0, zero, L(byteloop)
+ andi t1, a1, (NSIZE - 1)
+ beqz t1, L(exitalign)
+ or t0, zero, NSIZE
+ SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
-/* Both strings are 4 byte aligned at this point. */
+L(alignloop): #do by bytes until a1 aligned
+ BYTECMP01(0)
+ SUBU t1, t1, 0x1
+ PTR_ADDIU a0, a0, 0x1
+ bnez t1, L(alignloop)
+ PTR_ADDIU a1, a1, 0x1
- lui t8, 0x0101
- ori t8, t8, 0x0101
- lui t9, 0x7f7f
- ori t9, 0x7f7f
+L(exitalign):
-#define STRCMP32(OFFSET) \
- lw v0, OFFSET(a0); \
- lw v1, OFFSET(a1); \
- subu t0, v0, t8; \
- bne v0, v1, L(worddiff); \
- nor t1, v0, t9; \
- and t0, t0, t1; \
- bne t0, zero, L(returnzero)
+/* string a1 is NSIZE byte aligned at this point. */
+
+ lui t8, 0x0101
+ ori t8, 0x0101
+ lui t9, 0x7f7f
+ ori t9, 0x7f7f
+#if __mips64
+ dsll t1, t8, 32
+ or t8, t1
+ dsll t1, t9, 32
+ or t9, t1
+#endif
+
+ andi t2, a0, (NSIZE - 1) #check if a0 aligned
+ SUBU t3, t0, t2 #t3 will be used as shifter
+ bnez t2, L(uloopenter)
+ SUBU a2, a0, t2 #bring back a0 to aligned position
+
+#define STRCMPW(OFFSET) \
+ LW v0, OFFSET(a0); \
+ LW v1, OFFSET(a1); \
+ SUBU t0, v0, t8; \
+ bne v0, v1, L(worddiff); \
+ nor t1, v0, t9; \
+ and t0, t0, t1; \
+ bne t0, zero, L(returnzero);\
L(wordloop):
- STRCMP32(0)
- DELAY_READ
- STRCMP32(4)
- DELAY_READ
- STRCMP32(8)
- DELAY_READ
- STRCMP32(12)
- DELAY_READ
- STRCMP32(16)
- DELAY_READ
- STRCMP32(20)
- DELAY_READ
- STRCMP32(24)
- DELAY_READ
- STRCMP32(28)
- PTR_ADDIU a0, a0, 32
- b L(wordloop)
- PTR_ADDIU a1, a1, 32
+ STRCMPW(0 * NSIZE)
+ DELAY_READ
+ STRCMPW(1 * NSIZE)
+ DELAY_READ
+ STRCMPW(2 * NSIZE)
+ DELAY_READ
+ STRCMPW(3 * NSIZE)
+ DELAY_READ
+ STRCMPW(4 * NSIZE)
+ DELAY_READ
+ STRCMPW(5 * NSIZE)
+ DELAY_READ
+ STRCMPW(6 * NSIZE)
+ DELAY_READ
+ STRCMPW(7 * NSIZE)
+ PTR_ADDIU a0, a0, (8 * NSIZE)
+ b L(wordloop)
+ PTR_ADDIU a1, a1, (8 * NSIZE)
+
+#define USTRCMPW(OFFSET) \
+ LW v1, OFFSET(a1); \
+ SUBU t0, v0, t8; \
+ nor t1, v0, t9; \
+ and t0, t0, t1; \
+ bne t0, zero, L(worddiff); \
+ SRL v0, t2; \
+ LW a3, (OFFSET + NSIZE)(a2); \
+ SUBU t0, v1, t8; \
+ SLL t1, a3, t3; \
+ or v0, v0, t1; \
+ bne v0, v1, L(worddiff); \
+ nor t1, v1, t9; \
+ and t0, t0, t1; \
+ bne t0, zero, L(returnzero); \
+ move v0, a3;\
+
+L(uloopenter):
+ LW v0, 0(a2)
+ SLL t2, 3 #multiply by 8
+ SLL t3, 3 #multiply by 8
+ li a3, -1 #all 1s
+ SRL a3, t3
+ or v0, a3 #replace with all 1s if zeros in unintented read
+
+L(uwordloop):
+ USTRCMPW(0 * NSIZE)
+ USTRCMPW(1 * NSIZE)
+ USTRCMPW(2 * NSIZE)
+ USTRCMPW(3 * NSIZE)
+ USTRCMPW(4 * NSIZE)
+ USTRCMPW(5 * NSIZE)
+ USTRCMPW(6 * NSIZE)
+ USTRCMPW(7 * NSIZE)
+ PTR_ADDIU a2, a2, (8 * NSIZE)
+ b L(uwordloop)
+ PTR_ADDIU a1, a1, (8 * NSIZE)
L(returnzero):
- j ra
- move v0, zero
+ j ra
+ move v0, zero
+
+#if __mips_isa_rev > 1
+#define EXT_COMPARE01(POS) \
+ EXT t0, v0, POS, 8; \
+ beq t0, zero, L(wexit01); \
+ EXT t1, v1, POS, 8; \
+ bne t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+ EXT t8, v0, POS, 8; \
+ beq t8, zero, L(wexit89); \
+ EXT t9, v1, POS, 8; \
+ bne t8, t9, L(wexit89)
+#else
+#define EXT_COMPARE01(POS) \
+ SRL t0, v0, POS; \
+ SRL t1, v1, POS; \
+ andi t0, t0, 0xff; \
+ beq t0, zero, L(wexit01); \
+ andi t1, t1, 0xff; \
+ bne t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+ SRL t8, v0, POS; \
+ SRL t9, v1, POS; \
+ andi t8, t8, 0xff; \
+ beq t8, zero, L(wexit89); \
+ andi t9, t9, 0xff; \
+ bne t8, t9, L(wexit89)
+#endif
L(worddiff):
#ifdef USE_CLZ
- subu t0, v0, t8
- nor t1, v0, t9
- and t1, t0, t1
- xor t0, v0, v1
- or t0, t0, t1
+ SUBU t0, v0, t8
+ nor t1, v0, t9
+ and t1, t0, t1
+ xor t0, v0, v1
+ or t0, t0, t1
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- wsbh t0, t0
- rotr t0, t0, 16
+ wsbh t0, t0
+ rotr t0, t0, 16
# endif
- clz t1, t0
- and t1, 0xf8
+ clz t1, t0
+ and t1, 0xf8
# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- neg t1
- addu t1, 24
+ neg t1
+ addu t1, 24
# endif
- rotrv v0, v0, t1
- rotrv v1, v1, t1
- and v0, v0, 0xff
- and v1, v1, 0xff
- j ra
- subu v0, v0, v1
+ rotrv v0, v0, t1
+ rotrv v1, v1, t1
+ and v0, v0, 0xff
+ and v1, v1, 0xff
+ j ra
+ SUBU v0, v0, v1
#else /* USE_CLZ */
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- andi t0, v0, 0xff
- beq t0, zero, L(wexit01)
- andi t1, v1, 0xff
- bne t0, t1, L(wexit01)
+ andi t0, v0, 0xff
+ beq t0, zero, L(wexit01)
+ andi t1, v1, 0xff
+ bne t0, t1, L(wexit01)
+ EXT_COMPARE89(8)
+ EXT_COMPARE01(16)
+#ifndef __mips64
+ SRL t8, v0, 24
+ SRL t9, v1, 24
+#else
+ EXT_COMPARE89(24)
+ EXT_COMPARE01(32)
+ EXT_COMPARE89(40)
+ EXT_COMPARE01(48)
+ SRL t8, v0, 56
+ SRL t9, v1, 56
+#endif
- srl t8, v0, 8
- srl t9, v1, 8
- andi t8, t8, 0xff
- beq t8, zero, L(wexit89)
- andi t9, t9, 0xff
- bne t8, t9, L(wexit89)
-
- srl t0, v0, 16
- srl t1, v1, 16
- andi t0, t0, 0xff
- beq t0, zero, L(wexit01)
- andi t1, t1, 0xff
- bne t0, t1, L(wexit01)
-
- srl t8, v0, 24
- srl t9, v1, 24
# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
- srl t0, v0, 24
- beq t0, zero, L(wexit01)
- srl t1, v1, 24
- bne t0, t1, L(wexit01)
+#ifdef __mips64
+ SRL t0, v0, 56
+ beq t0, zero, L(wexit01)
+ SRL t1, v1, 56
+ bne t0, t1, L(wexit01)
+ EXT_COMPARE89(48)
+ EXT_COMPARE01(40)
+ EXT_COMPARE89(32)
+ EXT_COMPARE01(24)
+#else
+ SRL t0, v0, 24
+ beq t0, zero, L(wexit01)
+ SRL t1, v1, 24
+ bne t0, t1, L(wexit01)
+#endif
+ EXT_COMPARE89(16)
+ EXT_COMPARE01(8)
- srl t8, v0, 16
- srl t9, v1, 16
- andi t8, t8, 0xff
- beq t8, zero, L(wexit89)
- andi t9, t9, 0xff
- bne t8, t9, L(wexit89)
-
- srl t0, v0, 8
- srl t1, v1, 8
- andi t0, t0, 0xff
- beq t0, zero, L(wexit01)
- andi t1, t1, 0xff
- bne t0, t1, L(wexit01)
-
- andi t8, v0, 0xff
- andi t9, v1, 0xff
+ andi t8, v0, 0xff
+ andi t9, v1, 0xff
# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
L(wexit89):
- j ra
- subu v0, t8, t9
+ j ra
+ SUBU v0, t8, t9
L(wexit01):
- j ra
- subu v0, t0, t1
+ j ra
+ SUBU v0, t0, t1
#endif /* USE_CLZ */
-/* It might seem better to do the 'beq' instruction between the two 'lbu'
- instructions so that the nop is not needed but testing showed that this
- code is actually faster (based on glibc strcmp test). */
-#define BYTECMP01(OFFSET) \
- lbu v0, OFFSET(a0); \
- lbu v1, OFFSET(a1); \
- beq v0, zero, L(bexit01); \
- nop; \
- bne v0, v1, L(bexit01)
-
-#define BYTECMP89(OFFSET) \
- lbu t8, OFFSET(a0); \
- lbu t9, OFFSET(a1); \
- beq t8, zero, L(bexit89); \
- nop; \
- bne t8, t9, L(bexit89)
-
L(byteloop):
- BYTECMP01(0)
- BYTECMP89(1)
- BYTECMP01(2)
- BYTECMP89(3)
- BYTECMP01(4)
- BYTECMP89(5)
- BYTECMP01(6)
- BYTECMP89(7)
- PTR_ADDIU a0, a0, 8
- b L(byteloop)
- PTR_ADDIU a1, a1, 8
+ BYTECMP01(0)
+ BYTECMP89(1)
+ BYTECMP01(2)
+ BYTECMP89(3)
+ BYTECMP01(4)
+ BYTECMP89(5)
+ BYTECMP01(6)
+ BYTECMP89(7)
+ PTR_ADDIU a0, a0, 8
+ b L(byteloop)
+ PTR_ADDIU a1, a1, 8
L(bexit01):
- j ra
- subu v0, v0, v1
+ j ra
+ SUBU v0, v0, v1
L(bexit89):
- j ra
- subu v0, t8, t9
+ j ra
+ SUBU v0, t8, t9
- .set at
- .set reorder
+ .set at
+ .set reorder
END(STRCMP_NAME)
#ifndef __ANDROID__
diff --git a/libc/arch-mips/string/strcpy.c b/libc/arch-mips/string/strcpy.c
new file mode 100644
index 0000000..7b5dee3
--- /dev/null
+++ b/libc/arch-mips/string/strcpy.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#define op_t unsigned long int
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses? */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#if __mips64
+typedef struct
+{
+ op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#else
+typedef struct
+{
+ op_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#endif
+
+typedef union
+{
+ op_t v;
+ bits_t b;
+} bitfields_t;
+
+#if !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+ the compiler to treat the pointer access as unaligned. */
+struct ulw
+{
+ op_t uli;
+} __attribute__ ((packed));
+#endif /* !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT */
+
+#define DO_BYTE(i, ptdst) { \
+ *(ptdst+i) = a.b.B##i; \
+ if(a.b.B##i == '\0') \
+ return ret; \
+}
+
+#if __mips64
+#define DO_BYTES(val, dst) { \
+ bitfields_t a; \
+ char *tdst = (char *)(dst); \
+ a.v = val; \
+ DO_BYTE(0, tdst) \
+ DO_BYTE(1, tdst) \
+ DO_BYTE(2, tdst) \
+ DO_BYTE(3, tdst) \
+ DO_BYTE(4, tdst) \
+ DO_BYTE(5, tdst) \
+ DO_BYTE(6, tdst) \
+ DO_BYTE(7, tdst) \
+}
+#else
+#define DO_BYTES(val, dst) { \
+ bitfields_t a; \
+ char *tdst = (char *)(dst); \
+ a.v = val; \
+ DO_BYTE(0, tdst) \
+ DO_BYTE(1, tdst) \
+ DO_BYTE(2, tdst) \
+ DO_BYTE(3, tdst) \
+}
+#endif
+
+#define DO_WORD_ALIGNED(dst, src) { \
+ op_t val = *(src); \
+ if ((((val - mask_1) & ~val) & mask_128) != 0) { \
+ DO_BYTES(val, dst); \
+ } else *(dst) = val; \
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+#define DO_WORD_UNALIGNED(dst, src) { \
+ op_t val = *(src); \
+ if ((((val - mask_1) & ~val) & mask_128) != 0) { \
+ DO_BYTES(val, dst); \
+ } else { \
+ struct ulw *a = (struct ulw *)(dst); \
+ a->uli = val; \
+ } \
+}
+#else
+#define DO_WORD_UNALIGNED(dst, src) { \
+ op_t val = *(src); \
+ if ((((val - mask_1) & ~val) & mask_128) != 0) { \
+ DO_BYTES(val, dst); \
+ } else { \
+ char *pdst = (char *) dst; \
+ const char *psrc = (const char *) src; \
+ for (; (*pdst = *psrc) != '\0'; ++psrc, ++pdst); \
+ return ret; \
+ } \
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+
+#define PROCESS_UNALIGNED_WORDS(a, b) { \
+ while (1) { \
+ DO_WORD_UNALIGNED(a, b); \
+ DO_WORD_UNALIGNED(a + 1, b + 1); \
+ DO_WORD_UNALIGNED(a + 2, b + 2); \
+ DO_WORD_UNALIGNED(a + 3, b + 3); \
+ a += 4; \
+ b += 4; \
+ } \
+}
+#endif /* HW_UNALIGNED_SUPPORT */
+
+#define PROCESS_ALIGNED_WORDS(a, b) { \
+ while (1) { \
+ DO_WORD_ALIGNED(a, b); \
+ DO_WORD_ALIGNED(a + 1, b + 1); \
+ DO_WORD_ALIGNED(a + 2, b + 2); \
+ DO_WORD_ALIGNED(a + 3, b + 3); \
+ a += 4; \
+ b += 4; \
+ } \
+}
+
+char *
+strcpy (char *to, const char *from) __overloadable
+{
+ char *ret = to;
+ op_t mask_1, mask_128;
+ const op_t *src;
+ op_t *dst;
+
+ for (; (*to = *from) != '\0' && ((size_t) from % sizeof (op_t)) != 0; ++from, ++to);
+
+ if(*to != '\0') {
+ __asm__ volatile (
+ "li %0, 0x01010101 \n\t"
+ : "=r" (mask_1)
+ );
+#if __mips64
+ mask_1 |= mask_1 << 32;
+#endif
+ mask_128 = mask_1 << 7;
+
+ src = (const op_t *) from;
+ dst = (op_t *) to;
+
+#if HW_UNALIGNED_SUPPORT
+ PROCESS_ALIGNED_WORDS(dst, src);
+#else
+ if (((unsigned long) dst) % sizeof (op_t) == 0) {
+ PROCESS_ALIGNED_WORDS(dst, src);
+ } else {
+ PROCESS_UNALIGNED_WORDS(dst, src);
+ }
+#endif
+ }
+
+ return ret;
+}
diff --git a/libc/arch-mips/string/strlen.c b/libc/arch-mips/string/strlen.c
index 488e3c8..491efae 100644
--- a/libc/arch-mips/string/strlen.c
+++ b/libc/arch-mips/string/strlen.c
@@ -1,43 +1,115 @@
-/* $OpenBSD: strlen.c,v 1.8 2014/06/10 04:17:37 deraadt Exp $ */
-
-/*-
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
-size_t
-strlen(const char *str) __overloadable
-{
- const char *s;
+#define op_t unsigned long int
+#define op_size sizeof (op_t)
- for (s = str; *s; ++s)
- ;
- return (s - str);
+#if __mips64 || __mips_isa_rev >= 2
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p, op_t inval)
+{
+ op_t outval = 0;
+#if __mips64
+ __asm__ volatile (
+ "dsbh %1, %0 \n\t"
+ "dshd %0, %1 \n\t"
+ "dclz %1, %0 \n\t"
+ : "+r" (inval), "+r" (outval)
+ );
+#else
+ __asm__ volatile (
+ "wsbh %1, %0 \n\t"
+ "rotr %0, %1, 16 \n\t"
+ "clz %1, %0 \n\t"
+ : "+r" (inval), "+r" (outval)
+ );
+#endif
+ p += (outval >> 3);
+ return (size_t) (p - base);
}
+#define DO_WORD(w, cnt) { \
+ op_t val = ((w[cnt] - mask_1) & ~w[cnt]) & mask_128; \
+ if (val) \
+ return do_bytes(str, (const char *)(w + cnt), val); \
+}
+#else
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p)
+{
+ for (; *p; ++p);
+ return (size_t) (p - base);
+}
+
+#define DO_WORD(w, cnt) { \
+ if (((w[cnt] - mask_1) & ~w[cnt]) & mask_128) \
+ return do_bytes(str, (const char *)(w + cnt)); \
+}
+#endif
+
+size_t
+strlen (const char *str) __overloadable
+{
+ if (*str) {
+ const char *p = (const char *) str;
+ const op_t *w;
+ op_t mask_1, mask_128;
+
+ while ((size_t) p % sizeof (op_t)) {
+ if (!(*p))
+ return (p - str);
+ p++;
+ }
+
+ __asm__ volatile (
+ "li %0, 0x01010101 \n\t"
+ : "=r" (mask_1)
+ );
+#if __mips64
+ mask_1 |= mask_1 << 32;
+#endif
+ mask_128 = mask_1 << 7;
+
+ w = (const op_t *) p;
+
+ while (1) {
+ DO_WORD(w, 0);
+ DO_WORD(w, 1);
+ DO_WORD(w, 2);
+ DO_WORD(w, 3);
+ w += 4;
+ }
+ }
+ return 0;
+}
diff --git a/libc/arch-mips/string/strncmp.S b/libc/arch-mips/string/strncmp.S
new file mode 100644
index 0000000..4867c44
--- /dev/null
+++ b/libc/arch-mips/string/strncmp.S
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef __ANDROID__
+# include <private/bionic_asm.h>
+#elif _LIBC
+# include <sysdep.h>
+# include <regdef.h>
+# include <sys/asm.h>
+#elif _COMPILING_NEWLIB
+# include "machine/asm.h"
+# include "machine/regdef.h"
+#else
+# include <regdef.h>
+# include <sys/asm.h>
+#endif
+
+#if __mips64
+# define NSIZE 8
+# define LW ld
+# define LWR ldr
+# define LWL ldl
+# define EXT dext
+# define SRL dsrl
+# define SUBU dsubu
+#else
+# define NSIZE 4
+# define LW lw
+# define LWR lwr
+# define LWL lwl
+# define EXT ext
+# define SRL srl
+# define SUBU subu
+#endif
+
+/* Technically strcmp should not read past the end of the strings being
+ compared. We will read a full word that may contain excess bits beyond
+ the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
+ read the next word after the end of string. Setting ENABLE_READAHEAD will
+ improve performance but is technically illegal based on the definition of
+ strcmp. */
+#ifdef ENABLE_READAHEAD
+# define DELAY_READ
+#else
+# define DELAY_READ nop
+#endif
+
+/* Testing on a little endian machine showed using CLZ was a
+ performance loss, so we are not turning it on by default. */
+#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
+# define USE_CLZ
+#endif
+
+/* Some asm.h files do not have the L macro definition. */
+#ifndef L
+# if _MIPS_SIM == _ABIO32
+# define L(label) $L ## label
+# else
+# define L(label) .L ## label
+# endif
+#endif
+
+/* Some asm.h files do not have the PTR_ADDIU macro definition. */
+#ifndef PTR_ADDIU
+# if _MIPS_SIM == _ABIO32
+# define PTR_ADDIU addiu
+# else
+# define PTR_ADDIU daddiu
+# endif
+#endif
+
+/* It might seem better to do the 'beq' instruction between the two 'lbu'
+ instructions so that the nop is not needed but testing showed that this
+ code is actually faster (based on glibc strcmp test). */
+#define BYTECMP01(OFFSET) \
+ lbu v0, OFFSET(a0); \
+ lbu v1, OFFSET(a1); \
+ beq v0, zero, L(bexit01); \
+ nop; \
+ bne v0, v1, L(bexit01)
+
+#define BYTECMP89(OFFSET) \
+ lbu t8, OFFSET(a0); \
+ lbu t9, OFFSET(a1); \
+ beq t8, zero, L(bexit89); \
+ nop; \
+ bne t8, t9, L(bexit89)
+
+/* Allow the routine to be named something else if desired. */
+#ifndef STRNCMP_NAME
+# define STRNCMP_NAME strncmp
+#endif
+
+#ifdef __ANDROID__
+LEAF(STRNCMP_NAME, 0)
+#else
+LEAF(STRNCMP_NAME)
+#endif
+ .set nomips16
+ .set noreorder
+
+ srl t0, a2, (2 + NSIZE / 4)
+ beqz t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
+ andi t1, a1, (NSIZE - 1)
+ beqz t1, L(exitalign)
+ or t0, zero, NSIZE
+ SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
+ SUBU a2, a2, t1 #dec count by t1
+
+L(alignloop): #do by bytes until a1 aligned
+ BYTECMP01(0)
+ SUBU t1, t1, 0x1
+ PTR_ADDIU a0, a0, 0x1
+ bne t1, zero, L(alignloop)
+ PTR_ADDIU a1, a1, 0x1
+
+L(exitalign):
+
+/* string a1 is NSIZE byte aligned at this point. */
+#ifndef __mips1
+ lui t8, 0x0101
+ ori t8, 0x0101
+ lui t9, 0x7f7f
+ ori t9, 0x7f7f
+#if __mips64
+ dsll t0, t8, 32
+ or t8, t0
+ dsll t1, t9, 32
+ or t9, t1
+#endif
+#endif
+
+/* hardware or software alignment not supported for mips1
+ rev6 archs have h/w unaligned support
+ remainings archs need to implemented with unaligned instructions */
+
+#if __mips1
+ andi t0, a0, (NSIZE - 1)
+ bne t0, zero, L(byteloop)
+#elif __mips_isa_rev < 6
+ andi t0, a0, (NSIZE - 1)
+ bne t0, zero, L(uwordloop)
+#endif
+
+#define STRCMPW(OFFSET) \
+ LW v0, (OFFSET)(a0); \
+ LW v1, (OFFSET)(a1); \
+ SUBU t0, v0, t8; \
+ bne v0, v1, L(worddiff); \
+ nor t1, v0, t9; \
+ and t0, t0, t1; \
+ bne t0, zero, L(returnzero);\
+
+L(wordloop):
+ SUBU t1, a2, (8 * NSIZE)
+ bltz t1, L(onewords)
+ STRCMPW(0 * NSIZE)
+ DELAY_READ
+ STRCMPW(1 * NSIZE)
+ DELAY_READ
+ STRCMPW(2 * NSIZE)
+ DELAY_READ
+ STRCMPW(3 * NSIZE)
+ DELAY_READ
+ STRCMPW(4 * NSIZE)
+ DELAY_READ
+ STRCMPW(5 * NSIZE)
+ DELAY_READ
+ STRCMPW(6 * NSIZE)
+ DELAY_READ
+ STRCMPW(7 * NSIZE)
+ SUBU a2, a2, (8 * NSIZE)
+ PTR_ADDIU a0, a0, (8 * NSIZE)
+ b L(wordloop)
+ PTR_ADDIU a1, a1, (8 * NSIZE)
+
+L(onewords):
+ SUBU t1, a2, NSIZE
+ bltz t1, L(byteloop)
+ STRCMPW(0)
+ SUBU a2, a2, NSIZE
+ PTR_ADDIU a0, a0, NSIZE
+ b L(onewords)
+ PTR_ADDIU a1, a1, NSIZE
+
+#if __mips_isa_rev < 6 && !__mips1
+#define USTRCMPW(OFFSET) \
+ LWR v0, (OFFSET)(a0); \
+ LWL v0, (OFFSET + NSIZE - 1)(a0); \
+ LW v1, (OFFSET)(a1); \
+ SUBU t0, v0, t8; \
+ bne v0, v1, L(worddiff); \
+ nor t1, v0, t9; \
+ and t0, t0, t1; \
+ bne t0, zero, L(returnzero);\
+
+L(uwordloop):
+ SUBU t1, a2, (8 * NSIZE)
+ bltz t1, L(uonewords)
+ USTRCMPW(0 * NSIZE)
+ DELAY_READ
+ USTRCMPW(1 * NSIZE)
+ DELAY_READ
+ USTRCMPW(2 * NSIZE)
+ DELAY_READ
+ USTRCMPW(3 * NSIZE)
+ DELAY_READ
+ USTRCMPW(4 * NSIZE)
+ DELAY_READ
+ USTRCMPW(5 * NSIZE)
+ DELAY_READ
+ USTRCMPW(6 * NSIZE)
+ DELAY_READ
+ USTRCMPW(7 * NSIZE)
+ SUBU a2, a2, (8 * NSIZE)
+ PTR_ADDIU a0, a0, (8 * NSIZE)
+ b L(uwordloop)
+ PTR_ADDIU a1, a1, (8 * NSIZE)
+
+L(uonewords):
+ SUBU t1, a2, NSIZE
+ bltz t1, L(byteloop)
+ USTRCMPW(0)
+ SUBU a2, a2, NSIZE
+ PTR_ADDIU a0, a0, NSIZE
+ b L(uonewords)
+ PTR_ADDIU a1, a1, NSIZE
+
+#endif
+
+L(returnzero):
+ j ra
+ move v0, zero
+
+#if __mips_isa_rev > 1
+#define EXT_COMPARE01(POS) \
+ EXT t0, v0, POS, 8; \
+ beq t0, zero, L(wexit01); \
+ EXT t1, v1, POS, 8; \
+ bne t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+ EXT t8, v0, POS, 8; \
+ beq t8, zero, L(wexit89); \
+ EXT t9, v1, POS, 8; \
+ bne t8, t9, L(wexit89)
+#else
+#define EXT_COMPARE01(POS) \
+ SRL t0, v0, POS; \
+ SRL t1, v1, POS; \
+ andi t0, t0, 0xff; \
+ beq t0, zero, L(wexit01); \
+ andi t1, t1, 0xff; \
+ bne t0, t1, L(wexit01)
+#define EXT_COMPARE89(POS) \
+ SRL t8, v0, POS; \
+ SRL t9, v1, POS; \
+ andi t8, t8, 0xff; \
+ beq t8, zero, L(wexit89); \
+ andi t9, t9, 0xff; \
+ bne t8, t9, L(wexit89)
+#endif
+
+L(worddiff):
+#ifdef USE_CLZ
+ SUBU t0, v0, t8
+ nor t1, v0, t9
+ and t1, t0, t1
+ xor t0, v0, v1
+ or t0, t0, t1
+# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ wsbh t0, t0
+ rotr t0, t0, 16
+# endif
+ clz t1, t0
+ and t1, 0xf8
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ neg t1
+ addu t1, 24
+# endif
+ rotrv v0, v0, t1
+ rotrv v1, v1, t1
+ and v0, v0, 0xff
+ and v1, v1, 0xff
+ j ra
+ SUBU v0, v0, v1
+#else /* USE_CLZ */
+# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ andi t0, v0, 0xff
+ beq t0, zero, L(wexit01)
+ andi t1, v1, 0xff
+ bne t0, t1, L(wexit01)
+ EXT_COMPARE89(8)
+ EXT_COMPARE01(16)
+#ifndef __mips64
+ SRL t8, v0, 24
+ SRL t9, v1, 24
+#else
+ EXT_COMPARE89(24)
+ EXT_COMPARE01(32)
+ EXT_COMPARE89(40)
+ EXT_COMPARE01(48)
+ SRL t8, v0, 56
+ SRL t9, v1, 56
+#endif
+
+# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+#ifdef __mips64
+ SRL t0, v0, 56
+ beq t0, zero, L(wexit01)
+ SRL t1, v1, 56
+ bne t0, t1, L(wexit01)
+ EXT_COMPARE89(48)
+ EXT_COMPARE01(40)
+ EXT_COMPARE89(32)
+ EXT_COMPARE01(24)
+#else
+ SRL t0, v0, 24
+ beq t0, zero, L(wexit01)
+ SRL t1, v1, 24
+ bne t0, t1, L(wexit01)
+#endif
+ EXT_COMPARE89(16)
+ EXT_COMPARE01(8)
+
+ andi t8, v0, 0xff
+ andi t9, v1, 0xff
+# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+
+L(wexit89):
+ j ra
+ SUBU v0, t8, t9
+L(wexit01):
+ j ra
+ SUBU v0, t0, t1
+#endif /* USE_CLZ */
+
+L(byteloop):
+ beq a2, zero, L(returnzero)
+ SUBU a2, a2, 1
+ BYTECMP01(0)
+ nop
+ beq a2, zero, L(returnzero)
+ SUBU a2, a2, 1
+ BYTECMP89(1)
+ nop
+ beq a2, zero, L(returnzero)
+ SUBU a2, a2, 1
+ BYTECMP01(2)
+ nop
+ beq a2, zero, L(returnzero)
+ SUBU a2, a2, 1
+ BYTECMP89(3)
+ PTR_ADDIU a0, a0, 4
+ b L(byteloop)
+ PTR_ADDIU a1, a1, 4
+
+L(bexit01):
+ j ra
+ SUBU v0, v0, v1
+L(bexit89):
+ j ra
+ SUBU v0, t8, t9
+
+ .set at
+ .set reorder
+
+END(STRNCMP_NAME)
+#ifndef __ANDROID__
+# ifdef _LIBC
+libc_hidden_builtin_def (STRNCMP_NAME)
+# endif
+#endif
diff --git a/libc/arch-mips/string/strnlen.c b/libc/arch-mips/string/strnlen.c
new file mode 100644
index 0000000..2011deb
--- /dev/null
+++ b/libc/arch-mips/string/strnlen.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017 Imagination Technologies.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#define op_t unsigned long int
+#define op_size sizeof (op_t)
+
+#if __mips64 || __mips_isa_rev >= 2
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p, op_t inval)
+{
+ op_t outval = 0;
+#if __mips64
+ __asm__ volatile (
+ "dsbh %1, %0 \n\t"
+ "dshd %0, %1 \n\t"
+ "dclz %1, %0 \n\t"
+ : "+r" (inval), "+r" (outval)
+ );
+#else
+ __asm__ volatile (
+ "wsbh %1, %0 \n\t"
+ "rotr %0, %1, 16 \n\t"
+ "clz %1, %0 \n\t"
+ : "+r" (inval), "+r" (outval)
+ );
+#endif
+ p += (outval >> 3);
+ return (size_t) (p - base);
+}
+
+#define DO_WORD(in, val) { \
+ op_t tmp = ((val - mask_1) & ~val) & mask_128; \
+ if (tmp) \
+ return do_bytes(str, (const char *)(in), tmp); \
+}
+#else
+static inline size_t __attribute__ ((always_inline))
+do_bytes (const char *base, const char *p)
+{
+ for (; *p; ++p);
+ return (size_t) (p - base);
+}
+
+#define DO_WORD(in, val) { \
+ if (((val - mask_1) & ~val) & mask_128) { \
+ return do_bytes(str, (const char *)(in)); \
+ } \
+}
+#endif
+
+size_t strnlen (const char *str, size_t n) {
+ if (n != 0) {
+ const char *p = (const char *) str;
+ const op_t *w;
+ op_t mask_1, mask_128;
+
+ for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
+ if (!(*p))
+ return (p - str);
+ }
+
+ w = (const op_t *) p;
+
+ __asm__ volatile (
+ "li %0, 0x01010101 \n\t"
+ : "=r" (mask_1)
+ );
+#if __mips64
+ mask_1 |= mask_1 << 32;
+#endif
+ mask_128 = mask_1 << 7;
+
+ /*
+ * Check op_size byteswize after initial alignment
+ */
+ while (n >= 4 * op_size) {
+ const op_t w0 = w[0];
+ const op_t w1 = w[1];
+ const op_t w2 = w[2];
+ const op_t w3 = w[3];
+ DO_WORD(w + 0, w0)
+ DO_WORD(w + 1, w1)
+ DO_WORD(w + 2, w2)
+ DO_WORD(w + 3, w3)
+ w += 4;
+ n -= 4 * op_size;
+ }
+
+ while (n >= op_size) {
+ DO_WORD(w, w[0]);
+ w++;
+ n -= op_size;
+ }
+
+ /*
+ * Check bytewize for remaining bytes
+ */
+ p = (const char *) w;
+ for (; n > 0; --n, ++p) {
+ if (!(*p))
+ return (p - str);
+ }
+
+ return (p - str);
+ }
+
+ return 0;
+}