| /* |
| * Copyright (c) 2014 |
| * Imagination Technologies Limited. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| #ifdef __ANDROID__ |
| # include <private/bionic_asm.h> |
| #elif _LIBC |
| # include <sysdep.h> |
| # include <regdef.h> |
| # include <sys/asm.h> |
| #elif _COMPILING_NEWLIB |
| # include "machine/asm.h" |
| # include "machine/regdef.h" |
| #else |
| # include <regdef.h> |
| # include <sys/asm.h> |
| #endif |
| |
| /* Technically strcmp should not read past the end of the strings being |
| compared. We will read a full word that may contain excess bits beyond |
| the NULL string terminator but unless ENABLE_READAHEAD is set, we will not |
| read the next word after the end of string. Setting ENABLE_READAHEAD will |
| improve performance but is technically illegal based on the definition of |
| strcmp. */ |
| #ifdef ENABLE_READAHEAD |
| # define DELAY_READ |
| #else |
| # define DELAY_READ nop |
| #endif |
| |
| /* Testing on a little endian machine showed using CLZ was a |
| performance loss, so we are not turning it on by default. */ |
| #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) |
| # define USE_CLZ |
| #endif |
| |
| /* Some asm.h files do not have the L macro definition. */ |
| #ifndef L |
| # if _MIPS_SIM == _ABIO32 |
| # define L(label) $L ## label |
| # else |
| # define L(label) .L ## label |
| # endif |
| #endif |
| |
| /* Some asm.h files do not have the PTR_ADDIU macro definition. */ |
| #ifndef PTR_ADDIU |
| # if _MIPS_SIM == _ABIO32 |
| # define PTR_ADDIU addiu |
| # else |
| # define PTR_ADDIU daddiu |
| # endif |
| #endif |
| |
| /* Allow the routine to be named something else if desired. */ |
| #ifndef STRCMP_NAME |
| # define STRCMP_NAME strcmp |
| #endif |
| |
| #ifdef __ANDROID__ |
| LEAF(STRCMP_NAME, 0) |
| #else |
| LEAF(STRCMP_NAME) |
| #endif |
| .set nomips16 |
| .set noreorder |
| |
| or t0, a0, a1 |
| andi t0,0x3 |
| bne t0, zero, L(byteloop) |
| |
| /* Both strings are 4 byte aligned at this point. */ |
| |
| lui t8, 0x0101 |
| ori t8, t8, 0x0101 |
| lui t9, 0x7f7f |
| ori t9, 0x7f7f |
| |
| #define STRCMP32(OFFSET) \ |
| lw v0, OFFSET(a0); \ |
| lw v1, OFFSET(a1); \ |
| subu t0, v0, t8; \ |
| bne v0, v1, L(worddiff); \ |
| nor t1, v0, t9; \ |
| and t0, t0, t1; \ |
| bne t0, zero, L(returnzero) |
| |
| L(wordloop): |
| STRCMP32(0) |
| DELAY_READ |
| STRCMP32(4) |
| DELAY_READ |
| STRCMP32(8) |
| DELAY_READ |
| STRCMP32(12) |
| DELAY_READ |
| STRCMP32(16) |
| DELAY_READ |
| STRCMP32(20) |
| DELAY_READ |
| STRCMP32(24) |
| DELAY_READ |
| STRCMP32(28) |
| PTR_ADDIU a0, a0, 32 |
| b L(wordloop) |
| PTR_ADDIU a1, a1, 32 |
| |
| L(returnzero): |
| j ra |
| move v0, zero |
| |
| L(worddiff): |
| #ifdef USE_CLZ |
| subu t0, v0, t8 |
| nor t1, v0, t9 |
| and t1, t0, t1 |
| xor t0, v0, v1 |
| or t0, t0, t1 |
| # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| wsbh t0, t0 |
| rotr t0, t0, 16 |
| # endif |
| clz t1, t0 |
| and t1, 0xf8 |
| # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| neg t1 |
| addu t1, 24 |
| # endif |
| rotrv v0, v0, t1 |
| rotrv v1, v1, t1 |
| and v0, v0, 0xff |
| and v1, v1, 0xff |
| j ra |
| subu v0, v0, v1 |
| #else /* USE_CLZ */ |
| # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| andi t0, v0, 0xff |
| beq t0, zero, L(wexit01) |
| andi t1, v1, 0xff |
| bne t0, t1, L(wexit01) |
| |
| srl t8, v0, 8 |
| srl t9, v1, 8 |
| andi t8, t8, 0xff |
| beq t8, zero, L(wexit89) |
| andi t9, t9, 0xff |
| bne t8, t9, L(wexit89) |
| |
| srl t0, v0, 16 |
| srl t1, v1, 16 |
| andi t0, t0, 0xff |
| beq t0, zero, L(wexit01) |
| andi t1, t1, 0xff |
| bne t0, t1, L(wexit01) |
| |
| srl t8, v0, 24 |
| srl t9, v1, 24 |
| # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
| srl t0, v0, 24 |
| beq t0, zero, L(wexit01) |
| srl t1, v1, 24 |
| bne t0, t1, L(wexit01) |
| |
| srl t8, v0, 16 |
| srl t9, v1, 16 |
| andi t8, t8, 0xff |
| beq t8, zero, L(wexit89) |
| andi t9, t9, 0xff |
| bne t8, t9, L(wexit89) |
| |
| srl t0, v0, 8 |
| srl t1, v1, 8 |
| andi t0, t0, 0xff |
| beq t0, zero, L(wexit01) |
| andi t1, t1, 0xff |
| bne t0, t1, L(wexit01) |
| |
| andi t8, v0, 0xff |
| andi t9, v1, 0xff |
| # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
| |
| L(wexit89): |
| j ra |
| subu v0, t8, t9 |
| L(wexit01): |
| j ra |
| subu v0, t0, t1 |
| #endif /* USE_CLZ */ |
| |
| /* It might seem better to do the 'beq' instruction between the two 'lbu' |
| instructions so that the nop is not needed but testing showed that this |
| code is actually faster (based on glibc strcmp test). */ |
| #define BYTECMP01(OFFSET) \ |
| lbu v0, OFFSET(a0); \ |
| lbu v1, OFFSET(a1); \ |
| beq v0, zero, L(bexit01); \ |
| nop; \ |
| bne v0, v1, L(bexit01) |
| |
| #define BYTECMP89(OFFSET) \ |
| lbu t8, OFFSET(a0); \ |
| lbu t9, OFFSET(a1); \ |
| beq t8, zero, L(bexit89); \ |
| nop; \ |
| bne t8, t9, L(bexit89) |
| |
| L(byteloop): |
| BYTECMP01(0) |
| BYTECMP89(1) |
| BYTECMP01(2) |
| BYTECMP89(3) |
| BYTECMP01(4) |
| BYTECMP89(5) |
| BYTECMP01(6) |
| BYTECMP89(7) |
| PTR_ADDIU a0, a0, 8 |
| b L(byteloop) |
| PTR_ADDIU a1, a1, 8 |
| |
| L(bexit01): |
| j ra |
| subu v0, v0, v1 |
| L(bexit89): |
| j ra |
| subu v0, t8, t9 |
| |
| .set at |
| .set reorder |
| |
| END(STRCMP_NAME) |
| #ifndef __ANDROID__ |
| # ifdef _LIBC |
| libc_hidden_builtin_def (STRCMP_NAME) |
| # endif |
| #endif |