| /* |
| Copyright (c) 2011, Intel Corporation |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| |
| * Neither the name of Intel Corporation nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #ifndef USE_AS_STRCAT |
| |
| # ifndef L |
| # define L(label) .L##label |
| # endif |
| |
| # ifndef cfi_startproc |
| # define cfi_startproc .cfi_startproc |
| # endif |
| |
| # ifndef cfi_endproc |
| # define cfi_endproc .cfi_endproc |
| # endif |
| |
| # ifndef cfi_rel_offset |
| # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off |
| # endif |
| |
| # ifndef cfi_restore |
| # define cfi_restore(reg) .cfi_restore reg |
| # endif |
| |
| # ifndef cfi_adjust_cfa_offset |
| # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off |
| # endif |
| |
| # ifndef ENTRY |
| # define ENTRY(name) \ |
| .type name, @function; \ |
| .globl name; \ |
| .p2align 4; \ |
| name: \ |
| cfi_startproc |
| # endif |
| |
| # ifndef END |
| # define END(name) \ |
| cfi_endproc; \ |
| .size name, .-name |
| # endif |
| |
| # define CFI_PUSH(REG) \ |
| cfi_adjust_cfa_offset (4); \ |
| cfi_rel_offset (REG, 0) |
| |
| # define CFI_POP(REG) \ |
| cfi_adjust_cfa_offset (-4); \ |
| cfi_restore (REG) |
| |
| # define PUSH(REG) pushl REG; CFI_PUSH (REG) |
| # define POP(REG) popl REG; CFI_POP (REG) |
| |
| # ifndef STRCPY |
| # define STRCPY strcpy |
| # endif |
| |
| # ifdef USE_AS_STRNCPY |
| # define PARMS 8 |
| # define ENTRANCE PUSH (%ebx) |
| # define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); |
| # define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) |
| # else |
| # define PARMS 4 |
| # define ENTRANCE |
| # define RETURN ret |
| # define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) |
| # endif |
| |
| # ifdef USE_AS_STPCPY |
| # define SAVE_RESULT(n) lea n(%edx), %eax |
| # define SAVE_RESULT_TAIL(n) lea n(%edx), %eax |
| # else |
| # define SAVE_RESULT(n) movl %edi, %eax |
| # define SAVE_RESULT_TAIL(n) movl %edx, %eax |
| # endif |
| |
| # define STR1 PARMS |
| # define STR2 STR1+4 |
| # define LEN STR2+4 |
| |
| /* In this code following instructions are used for copying: |
| movb - 1 byte |
| movw - 2 byte |
| movl - 4 byte |
| movlpd - 8 byte |
| movaps - 16 byte - requires 16 byte alignment |
| of sourse and destination adresses. |
| */ |
| |
| .text |
| ENTRY (STRCPY) |
| ENTRANCE |
| mov STR1(%esp), %edx |
| mov STR2(%esp), %ecx |
| # ifdef USE_AS_STRNCPY |
| movl LEN(%esp), %ebx |
| cmp $8, %ebx |
| jbe L(StrncpyExit8Bytes) |
| # endif |
| cmpb $0, (%ecx) |
| jz L(ExitTail1) |
| cmpb $0, 1(%ecx) |
| jz L(ExitTail2) |
| cmpb $0, 2(%ecx) |
| jz L(ExitTail3) |
| cmpb $0, 3(%ecx) |
| jz L(ExitTail4) |
| cmpb $0, 4(%ecx) |
| jz L(ExitTail5) |
| cmpb $0, 5(%ecx) |
| jz L(ExitTail6) |
| cmpb $0, 6(%ecx) |
| jz L(ExitTail7) |
| cmpb $0, 7(%ecx) |
| jz L(ExitTail8) |
| # ifdef USE_AS_STRNCPY |
| cmp $16, %ebx |
| jb L(StrncpyExit15Bytes) |
| # endif |
| cmpb $0, 8(%ecx) |
| jz L(ExitTail9) |
| cmpb $0, 9(%ecx) |
| jz L(ExitTail10) |
| cmpb $0, 10(%ecx) |
| jz L(ExitTail11) |
| cmpb $0, 11(%ecx) |
| jz L(ExitTail12) |
| cmpb $0, 12(%ecx) |
| jz L(ExitTail13) |
| cmpb $0, 13(%ecx) |
| jz L(ExitTail14) |
| cmpb $0, 14(%ecx) |
| jz L(ExitTail15) |
| # if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY |
| cmp $16, %ebx |
| je L(ExitTail16) |
| # endif |
| cmpb $0, 15(%ecx) |
| jz L(ExitTail16) |
| |
| # if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY |
| cmp $16, %ebx |
| je L(StrlcpyExitTail16) |
| # endif |
| |
| PUSH (%edi) |
| # ifndef USE_AS_STRLCPY |
| mov %edx, %edi |
| # else |
| mov %ecx, %edi |
| # endif |
| #endif |
| PUSH (%esi) |
| #ifdef USE_AS_STRNCPY |
| mov %ecx, %esi |
| sub $16, %ebx |
| and $0xf, %esi |
| |
| /* add 16 bytes ecx_offset to ebx */ |
| |
| add %esi, %ebx |
| #endif |
| lea 16(%ecx), %esi |
| and $-16, %esi |
| pxor %xmm0, %xmm0 |
| movlpd (%ecx), %xmm1 |
| movlpd %xmm1, (%edx) |
| |
| pcmpeqb (%esi), %xmm0 |
| movlpd 8(%ecx), %xmm1 |
| movlpd %xmm1, 8(%edx) |
| |
| pmovmskb %xmm0, %eax |
| sub %ecx, %esi |
| |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| mov %edx, %eax |
| lea 16(%edx), %edx |
| and $-16, %edx |
| sub %edx, %eax |
| |
| #ifdef USE_AS_STRNCPY |
| add %eax, %esi |
| lea -1(%esi), %esi |
| and $1<<31, %esi |
| test %esi, %esi |
| jnz L(ContinueCopy) |
| lea 16(%ebx), %ebx |
| |
| L(ContinueCopy): |
| #endif |
| sub %eax, %ecx |
| mov %ecx, %eax |
| and $0xf, %eax |
| mov $0, %esi |
| |
| /* case: ecx_offset == edx_offset */ |
| |
| jz L(Align16Both) |
| |
| cmp $8, %eax |
| jae L(ShlHigh8) |
| cmp $1, %eax |
| je L(Shl1) |
| cmp $2, %eax |
| je L(Shl2) |
| cmp $3, %eax |
| je L(Shl3) |
| cmp $4, %eax |
| je L(Shl4) |
| cmp $5, %eax |
| je L(Shl5) |
| cmp $6, %eax |
| je L(Shl6) |
| jmp L(Shl7) |
| |
| L(ShlHigh8): |
| je L(Shl8) |
| cmp $9, %eax |
| je L(Shl9) |
| cmp $10, %eax |
| je L(Shl10) |
| cmp $11, %eax |
| je L(Shl11) |
| cmp $12, %eax |
| je L(Shl12) |
| cmp $13, %eax |
| je L(Shl13) |
| cmp $14, %eax |
| je L(Shl14) |
| jmp L(Shl15) |
| |
| L(Align16Both): |
| movaps (%ecx), %xmm1 |
| movaps 16(%ecx), %xmm2 |
| movaps %xmm1, (%edx) |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps 16(%ecx, %esi), %xmm3 |
| movaps %xmm2, (%edx, %esi) |
| pcmpeqb %xmm3, %xmm0 |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps 16(%ecx, %esi), %xmm4 |
| movaps %xmm3, (%edx, %esi) |
| pcmpeqb %xmm4, %xmm0 |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps 16(%ecx, %esi), %xmm1 |
| movaps %xmm4, (%edx, %esi) |
| pcmpeqb %xmm1, %xmm0 |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps 16(%ecx, %esi), %xmm2 |
| movaps %xmm1, (%edx, %esi) |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps 16(%ecx, %esi), %xmm3 |
| movaps %xmm2, (%edx, %esi) |
| pcmpeqb %xmm3, %xmm0 |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps %xmm3, (%edx, %esi) |
| mov %ecx, %eax |
| lea 16(%ecx, %esi), %ecx |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| lea 112(%ebx, %eax), %ebx |
| #endif |
| mov $-0x40, %esi |
| |
| L(Aligned64Loop): |
| movaps (%ecx), %xmm2 |
| movaps 32(%ecx), %xmm3 |
| movaps %xmm2, %xmm4 |
| movaps 16(%ecx), %xmm5 |
| movaps %xmm3, %xmm6 |
| movaps 48(%ecx), %xmm7 |
| pminub %xmm5, %xmm2 |
| pminub %xmm7, %xmm3 |
| pminub %xmm2, %xmm3 |
| lea 64(%edx), %edx |
| pcmpeqb %xmm0, %xmm3 |
| lea 64(%ecx), %ecx |
| pmovmskb %xmm3, %eax |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeaveCase2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Aligned64Leave) |
| movaps %xmm4, -64(%edx) |
| movaps %xmm5, -48(%edx) |
| movaps %xmm6, -32(%edx) |
| movaps %xmm7, -16(%edx) |
| jmp L(Aligned64Loop) |
| |
| L(Aligned64Leave): |
| #ifdef USE_AS_STRNCPY |
| lea 48(%ebx), %ebx |
| #endif |
| pcmpeqb %xmm4, %xmm0 |
| pmovmskb %xmm0, %eax |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| pcmpeqb %xmm5, %xmm0 |
| #ifdef USE_AS_STRNCPY |
| lea -16(%ebx), %ebx |
| #endif |
| pmovmskb %xmm0, %eax |
| movaps %xmm4, -64(%edx) |
| lea 16(%esi), %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| pcmpeqb %xmm6, %xmm0 |
| #ifdef USE_AS_STRNCPY |
| lea -16(%ebx), %ebx |
| #endif |
| pmovmskb %xmm0, %eax |
| movaps %xmm5, -48(%edx) |
| lea 16(%esi), %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| movaps %xmm6, -32(%edx) |
| pcmpeqb %xmm7, %xmm0 |
| #ifdef USE_AS_STRNCPY |
| lea -16(%ebx), %ebx |
| #endif |
| pmovmskb %xmm0, %eax |
| lea 16(%esi), %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl1): |
| movaps -1(%ecx), %xmm1 |
| movaps 15(%ecx), %xmm2 |
| L(Shl1Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit1Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl1LoopExit) |
| |
| palignr $1, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 31(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit1Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl1LoopExit) |
| |
| palignr $1, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 31(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit1Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl1LoopExit) |
| |
| palignr $1, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 31(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit1Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl1LoopExit) |
| |
| palignr $1, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 31(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -15(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -1(%ecx), %xmm1 |
| |
| L(Shl1LoopStart): |
| movaps 15(%ecx), %xmm2 |
| movaps 31(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 47(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 63(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $1, %xmm4, %xmm5 |
| palignr $1, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl1Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave1) |
| #endif |
| palignr $1, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $1, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl1LoopStart) |
| |
| L(Shl1LoopExit): |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| movlpd 7(%ecx), %xmm0 |
| movlpd %xmm0, 7(%edx) |
| mov $15, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl2): |
| movaps -2(%ecx), %xmm1 |
| movaps 14(%ecx), %xmm2 |
| L(Shl2Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit2Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl2LoopExit) |
| |
| palignr $2, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 30(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit2Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl2LoopExit) |
| |
| palignr $2, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 30(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit2Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl2LoopExit) |
| |
| palignr $2, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 30(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit2Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl2LoopExit) |
| |
| palignr $2, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 30(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -14(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -2(%ecx), %xmm1 |
| |
| L(Shl2LoopStart): |
| movaps 14(%ecx), %xmm2 |
| movaps 30(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 46(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 62(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $2, %xmm4, %xmm5 |
| palignr $2, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl2Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave2) |
| #endif |
| palignr $2, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $2, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl2LoopStart) |
| |
| L(Shl2LoopExit): |
| movlpd (%ecx), %xmm0 |
| movlpd 6(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 6(%edx) |
| mov $14, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl3): |
| movaps -3(%ecx), %xmm1 |
| movaps 13(%ecx), %xmm2 |
| L(Shl3Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit3Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl3LoopExit) |
| |
| palignr $3, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 29(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit3Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl3LoopExit) |
| |
| palignr $3, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 29(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit3Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl3LoopExit) |
| |
| palignr $3, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 29(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit3Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl3LoopExit) |
| |
| palignr $3, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 29(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -13(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -3(%ecx), %xmm1 |
| |
| L(Shl3LoopStart): |
| movaps 13(%ecx), %xmm2 |
| movaps 29(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 45(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 61(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $3, %xmm4, %xmm5 |
| palignr $3, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl3Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave3) |
| #endif |
| palignr $3, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $3, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl3LoopStart) |
| |
| L(Shl3LoopExit): |
| movlpd (%ecx), %xmm0 |
| movlpd 5(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 5(%edx) |
| mov $13, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl4): |
| movaps -4(%ecx), %xmm1 |
| movaps 12(%ecx), %xmm2 |
| L(Shl4Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit4Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl4LoopExit) |
| |
| palignr $4, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 28(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit4Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl4LoopExit) |
| |
| palignr $4, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 28(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit4Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl4LoopExit) |
| |
| palignr $4, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 28(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit4Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl4LoopExit) |
| |
| palignr $4, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 28(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -12(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -4(%ecx), %xmm1 |
| |
| L(Shl4LoopStart): |
| movaps 12(%ecx), %xmm2 |
| movaps 28(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 44(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 60(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $4, %xmm4, %xmm5 |
| palignr $4, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl4Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave4) |
| #endif |
| palignr $4, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $4, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl4LoopStart) |
| |
| L(Shl4LoopExit): |
| movlpd (%ecx), %xmm0 |
| movl 8(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 8(%edx) |
| mov $12, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl5): |
| movaps -5(%ecx), %xmm1 |
| movaps 11(%ecx), %xmm2 |
| L(Shl5Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit5Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl5LoopExit) |
| |
| palignr $5, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 27(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit5Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl5LoopExit) |
| |
| palignr $5, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 27(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit5Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl5LoopExit) |
| |
| palignr $5, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 27(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit5Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl5LoopExit) |
| |
| palignr $5, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 27(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -11(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -5(%ecx), %xmm1 |
| |
| L(Shl5LoopStart): |
| movaps 11(%ecx), %xmm2 |
| movaps 27(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 43(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 59(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $5, %xmm4, %xmm5 |
| palignr $5, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl5Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave5) |
| #endif |
| palignr $5, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $5, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl5LoopStart) |
| |
| L(Shl5LoopExit): |
| movlpd (%ecx), %xmm0 |
| movl 7(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 7(%edx) |
| mov $11, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl6): |
| movaps -6(%ecx), %xmm1 |
| movaps 10(%ecx), %xmm2 |
| L(Shl6Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit6Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl6LoopExit) |
| |
| palignr $6, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 26(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit6Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl6LoopExit) |
| |
| palignr $6, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 26(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit6Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl6LoopExit) |
| |
| palignr $6, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 26(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit6Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl6LoopExit) |
| |
| palignr $6, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 26(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -10(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -6(%ecx), %xmm1 |
| |
| L(Shl6LoopStart): |
| movaps 10(%ecx), %xmm2 |
| movaps 26(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 42(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 58(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $6, %xmm4, %xmm5 |
| palignr $6, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl6Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave6) |
| #endif |
| palignr $6, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $6, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl6LoopStart) |
| |
| L(Shl6LoopExit): |
| movlpd (%ecx), %xmm0 |
| movl 6(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 6(%edx) |
| mov $10, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl7): |
| movaps -7(%ecx), %xmm1 |
| movaps 9(%ecx), %xmm2 |
| L(Shl7Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit7Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl7LoopExit) |
| |
| palignr $7, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 25(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit7Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl7LoopExit) |
| |
| palignr $7, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 25(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit7Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl7LoopExit) |
| |
| palignr $7, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 25(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit7Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl7LoopExit) |
| |
| palignr $7, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 25(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -9(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -7(%ecx), %xmm1 |
| |
| L(Shl7LoopStart): |
| movaps 9(%ecx), %xmm2 |
| movaps 25(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 41(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 57(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $7, %xmm4, %xmm5 |
| palignr $7, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl7Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave7) |
| #endif |
| palignr $7, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $7, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl7LoopStart) |
| |
| L(Shl7LoopExit): |
| movlpd (%ecx), %xmm0 |
| movl 5(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 5(%edx) |
| mov $9, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl8): |
| movaps -8(%ecx), %xmm1 |
| movaps 8(%ecx), %xmm2 |
| L(Shl8Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit8Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl8LoopExit) |
| |
| palignr $8, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 24(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit8Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl8LoopExit) |
| |
| palignr $8, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 24(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit8Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl8LoopExit) |
| |
| palignr $8, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 24(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit8Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl8LoopExit) |
| |
| palignr $8, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 24(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -8(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -8(%ecx), %xmm1 |
| |
| L(Shl8LoopStart): |
| movaps 8(%ecx), %xmm2 |
| movaps 24(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 40(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 56(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $8, %xmm4, %xmm5 |
| palignr $8, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl8Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave8) |
| #endif |
| palignr $8, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $8, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl8LoopStart) |
| |
| L(Shl8LoopExit): |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| mov $8, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl9): |
| movaps -9(%ecx), %xmm1 |
| movaps 7(%ecx), %xmm2 |
| L(Shl9Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit9Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl9LoopExit) |
| |
| palignr $9, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 23(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit9Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl9LoopExit) |
| |
| palignr $9, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 23(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit9Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl9LoopExit) |
| |
| palignr $9, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 23(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit9Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl9LoopExit) |
| |
| palignr $9, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 23(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -7(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -9(%ecx), %xmm1 |
| |
| L(Shl9LoopStart): |
| movaps 7(%ecx), %xmm2 |
| movaps 23(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 39(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 55(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $9, %xmm4, %xmm5 |
| palignr $9, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl9Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave9) |
| #endif |
| palignr $9, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $9, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl9LoopStart) |
| |
| L(Shl9LoopExit): |
| movlpd -1(%ecx), %xmm0 |
| movlpd %xmm0, -1(%edx) |
| mov $7, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl10): |
| movaps -10(%ecx), %xmm1 |
| movaps 6(%ecx), %xmm2 |
| L(Shl10Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit10Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl10LoopExit) |
| |
| palignr $10, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 22(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit10Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl10LoopExit) |
| |
| palignr $10, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 22(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit10Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl10LoopExit) |
| |
| palignr $10, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 22(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit10Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl10LoopExit) |
| |
| palignr $10, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 22(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -6(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -10(%ecx), %xmm1 |
| |
| L(Shl10LoopStart): |
| movaps 6(%ecx), %xmm2 |
| movaps 22(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 38(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 54(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $10, %xmm4, %xmm5 |
| palignr $10, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl10Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave10) |
| #endif |
| palignr $10, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $10, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl10LoopStart) |
| |
| L(Shl10LoopExit): |
| movlpd -2(%ecx), %xmm0 |
| movlpd %xmm0, -2(%edx) |
| mov $6, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl11): |
| movaps -11(%ecx), %xmm1 |
| movaps 5(%ecx), %xmm2 |
| L(Shl11Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit11Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl11LoopExit) |
| |
| palignr $11, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 21(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit11Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl11LoopExit) |
| |
| palignr $11, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 21(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit11Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl11LoopExit) |
| |
| palignr $11, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 21(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit11Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl11LoopExit) |
| |
| palignr $11, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 21(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -5(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -11(%ecx), %xmm1 |
| |
| L(Shl11LoopStart): |
| movaps 5(%ecx), %xmm2 |
| movaps 21(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 37(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 53(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $11, %xmm4, %xmm5 |
| palignr $11, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl11Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave11) |
| #endif |
| palignr $11, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $11, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl11LoopStart) |
| |
| L(Shl11LoopExit): |
| movlpd -3(%ecx), %xmm0 |
| movlpd %xmm0, -3(%edx) |
| mov $5, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl12): |
| movaps -12(%ecx), %xmm1 |
| movaps 4(%ecx), %xmm2 |
| L(Shl12Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit12Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl12LoopExit) |
| |
| palignr $12, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 20(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit12Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl12LoopExit) |
| |
| palignr $12, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 20(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit12Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl12LoopExit) |
| |
| palignr $12, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 20(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit12Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl12LoopExit) |
| |
| palignr $12, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 20(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -4(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -12(%ecx), %xmm1 |
| |
| L(Shl12LoopStart): |
| movaps 4(%ecx), %xmm2 |
| movaps 20(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 36(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 52(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $12, %xmm4, %xmm5 |
| palignr $12, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl12Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave12) |
| #endif |
| palignr $12, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $12, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl12LoopStart) |
| |
| L(Shl12LoopExit): |
| movl (%ecx), %esi |
| movl %esi, (%edx) |
| mov $4, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl13): |
| movaps -13(%ecx), %xmm1 |
| movaps 3(%ecx), %xmm2 |
| L(Shl13Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit13Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl13LoopExit) |
| |
| palignr $13, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 19(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit13Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl13LoopExit) |
| |
| palignr $13, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 19(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit13Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl13LoopExit) |
| |
| palignr $13, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 19(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit13Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl13LoopExit) |
| |
| palignr $13, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 19(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -3(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -13(%ecx), %xmm1 |
| |
| L(Shl13LoopStart): |
| movaps 3(%ecx), %xmm2 |
| movaps 19(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 35(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 51(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $13, %xmm4, %xmm5 |
| palignr $13, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl13Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave13) |
| #endif |
| palignr $13, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $13, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl13LoopStart) |
| |
| L(Shl13LoopExit): |
| movl -1(%ecx), %esi |
| movl %esi, -1(%edx) |
| mov $3, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl14): |
| movaps -14(%ecx), %xmm1 |
| movaps 2(%ecx), %xmm2 |
| L(Shl14Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit14Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl14LoopExit) |
| |
| palignr $14, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 18(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit14Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl14LoopExit) |
| |
| palignr $14, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 18(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit14Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl14LoopExit) |
| |
| palignr $14, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 18(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit14Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl14LoopExit) |
| |
| palignr $14, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 18(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -2(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -14(%ecx), %xmm1 |
| |
| L(Shl14LoopStart): |
| movaps 2(%ecx), %xmm2 |
| movaps 18(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 34(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 50(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $14, %xmm4, %xmm5 |
| palignr $14, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl14Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave14) |
| #endif |
| palignr $14, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $14, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl14LoopStart) |
| |
| L(Shl14LoopExit): |
| movl -2(%ecx), %esi |
| movl %esi, -2(%edx) |
| mov $2, %esi |
| jmp L(CopyFrom1To16Bytes) |
| |
| .p2align 4 |
| L(Shl15): |
| movaps -15(%ecx), %xmm1 |
| movaps 1(%ecx), %xmm2 |
| L(Shl15Start): |
| pcmpeqb %xmm2, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit15Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl15LoopExit) |
| |
| palignr $15, %xmm1, %xmm2 |
| movaps %xmm3, %xmm1 |
| movaps %xmm2, (%edx) |
| movaps 17(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit15Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl15LoopExit) |
| |
| palignr $15, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 17(%ecx), %xmm2 |
| movaps %xmm3, %xmm1 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| movaps %xmm2, %xmm3 |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit15Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl15LoopExit) |
| |
| palignr $15, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 17(%ecx), %xmm2 |
| |
| pcmpeqb %xmm2, %xmm0 |
| lea 16(%edx), %edx |
| pmovmskb %xmm0, %eax |
| lea 16(%ecx), %ecx |
| #ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| jbe L(StrncpyExit15Case2OrCase3) |
| #endif |
| test %eax, %eax |
| jnz L(Shl15LoopExit) |
| |
| palignr $15, %xmm3, %xmm2 |
| movaps %xmm2, (%edx) |
| lea 17(%ecx), %ecx |
| lea 16(%edx), %edx |
| |
| mov %ecx, %eax |
| and $-0x40, %ecx |
| sub %ecx, %eax |
| lea -1(%ecx), %ecx |
| sub %eax, %edx |
| #ifdef USE_AS_STRNCPY |
| add %eax, %ebx |
| #endif |
| movaps -15(%ecx), %xmm1 |
| |
| L(Shl15LoopStart): |
| movaps 1(%ecx), %xmm2 |
| movaps 17(%ecx), %xmm3 |
| movaps %xmm3, %xmm6 |
| movaps 33(%ecx), %xmm4 |
| movaps %xmm4, %xmm7 |
| movaps 49(%ecx), %xmm5 |
| pminub %xmm2, %xmm6 |
| pminub %xmm5, %xmm7 |
| pminub %xmm6, %xmm7 |
| pcmpeqb %xmm0, %xmm7 |
| pmovmskb %xmm7, %eax |
| movaps %xmm5, %xmm7 |
| palignr $15, %xmm4, %xmm5 |
| palignr $15, %xmm3, %xmm4 |
| test %eax, %eax |
| jnz L(Shl15Start) |
| #ifdef USE_AS_STRNCPY |
| sub $64, %ebx |
| jbe L(StrncpyLeave15) |
| #endif |
| palignr $15, %xmm2, %xmm3 |
| lea 64(%ecx), %ecx |
| palignr $15, %xmm1, %xmm2 |
| movaps %xmm7, %xmm1 |
| movaps %xmm5, 48(%edx) |
| movaps %xmm4, 32(%edx) |
| movaps %xmm3, 16(%edx) |
| movaps %xmm2, (%edx) |
| lea 64(%edx), %edx |
| jmp L(Shl15LoopStart) |
| |
| L(Shl15LoopExit): |
| movl -3(%ecx), %esi |
| movl %esi, -3(%edx) |
| mov $1, %esi |
| #if defined USE_AS_STRCAT || defined USE_AS_STRLCPY |
| jmp L(CopyFrom1To16Bytes) |
| #endif |
| |
| |
| #if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY |
| |
| .p2align 4 |
| L(CopyFrom1To16Bytes): |
| # ifdef USE_AS_STRNCPY |
| add $16, %ebx |
| # endif |
| add %esi, %edx |
| add %esi, %ecx |
| |
| POP (%esi) |
| test %al, %al |
| jz L(ExitHigh8) |
| |
| L(CopyFrom1To16BytesLess8): |
| mov %al, %ah |
| and $15, %ah |
| jz L(ExitHigh4) |
| |
| test $0x01, %al |
| jnz L(Exit1) |
| test $0x02, %al |
| jnz L(Exit2) |
| test $0x04, %al |
| jnz L(Exit3) |
| |
| .p2align 4 |
| L(Exit4): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| SAVE_RESULT (3) |
| # ifdef USE_AS_STRNCPY |
| sub $4, %ebx |
| lea 4(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(ExitHigh4): |
| test $0x10, %al |
| jnz L(Exit5) |
| test $0x20, %al |
| jnz L(Exit6) |
| test $0x40, %al |
| jnz L(Exit7) |
| |
| .p2align 4 |
| L(Exit8): |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| SAVE_RESULT (7) |
| # ifdef USE_AS_STRNCPY |
| sub $8, %ebx |
| lea 8(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(ExitHigh8): |
| mov %ah, %al |
| and $15, %al |
| jz L(ExitHigh12) |
| |
| test $0x01, %ah |
| jnz L(Exit9) |
| test $0x02, %ah |
| jnz L(Exit10) |
| test $0x04, %ah |
| jnz L(Exit11) |
| |
| .p2align 4 |
| L(Exit12): |
| movlpd (%ecx), %xmm0 |
| movl 8(%ecx), %eax |
| movlpd %xmm0, (%edx) |
| movl %eax, 8(%edx) |
| SAVE_RESULT (11) |
| # ifdef USE_AS_STRNCPY |
| sub $12, %ebx |
| lea 12(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(ExitHigh12): |
| test $0x10, %ah |
| jnz L(Exit13) |
| test $0x20, %ah |
| jnz L(Exit14) |
| test $0x40, %ah |
| jnz L(Exit15) |
| |
| .p2align 4 |
| L(Exit16): |
| movdqu (%ecx), %xmm0 |
| movdqu %xmm0, (%edx) |
| SAVE_RESULT (15) |
| # ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| lea 16(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| # ifdef USE_AS_STRNCPY |
| |
| CFI_PUSH(%esi) |
| |
| .p2align 4 |
| L(CopyFrom1To16BytesCase2): |
| add $16, %ebx |
| add %esi, %ecx |
| add %esi, %edx |
| |
| POP (%esi) |
| |
| test %al, %al |
| jz L(ExitHighCase2) |
| |
| cmp $8, %ebx |
| ja L(CopyFrom1To16BytesLess8) |
| |
| test $0x01, %al |
| jnz L(Exit1) |
| cmp $1, %ebx |
| je L(Exit1) |
| test $0x02, %al |
| jnz L(Exit2) |
| cmp $2, %ebx |
| je L(Exit2) |
| test $0x04, %al |
| jnz L(Exit3) |
| cmp $3, %ebx |
| je L(Exit3) |
| test $0x08, %al |
| jnz L(Exit4) |
| cmp $4, %ebx |
| je L(Exit4) |
| test $0x10, %al |
| jnz L(Exit5) |
| cmp $5, %ebx |
| je L(Exit5) |
| test $0x20, %al |
| jnz L(Exit6) |
| cmp $6, %ebx |
| je L(Exit6) |
| test $0x40, %al |
| jnz L(Exit7) |
| cmp $7, %ebx |
| je L(Exit7) |
| jmp L(Exit8) |
| |
| .p2align 4 |
| L(ExitHighCase2): |
| cmp $8, %ebx |
| jbe L(CopyFrom1To16BytesLess8Case3) |
| |
| test $0x01, %ah |
| jnz L(Exit9) |
| cmp $9, %ebx |
| je L(Exit9) |
| test $0x02, %ah |
| jnz L(Exit10) |
| cmp $10, %ebx |
| je L(Exit10) |
| test $0x04, %ah |
| jnz L(Exit11) |
| cmp $11, %ebx |
| je L(Exit11) |
| test $0x8, %ah |
| jnz L(Exit12) |
| cmp $12, %ebx |
| je L(Exit12) |
| test $0x10, %ah |
| jnz L(Exit13) |
| cmp $13, %ebx |
| je L(Exit13) |
| test $0x20, %ah |
| jnz L(Exit14) |
| cmp $14, %ebx |
| je L(Exit14) |
| test $0x40, %ah |
| jnz L(Exit15) |
| cmp $15, %ebx |
| je L(Exit15) |
| jmp L(Exit16) |
| |
| CFI_PUSH(%esi) |
| |
| .p2align 4 |
| L(CopyFrom1To16BytesCase2OrCase3): |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| |
| .p2align 4 |
| L(CopyFrom1To16BytesCase3): |
| add $16, %ebx |
| add %esi, %edx |
| add %esi, %ecx |
| |
| POP (%esi) |
| |
| cmp $8, %ebx |
| ja L(ExitHigh8Case3) |
| |
| L(CopyFrom1To16BytesLess8Case3): |
| cmp $4, %ebx |
| ja L(ExitHigh4Case3) |
| |
| cmp $1, %ebx |
| je L(Exit1) |
| cmp $2, %ebx |
| je L(Exit2) |
| cmp $3, %ebx |
| je L(Exit3) |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| SAVE_RESULT (4) |
| RETURN1 |
| |
| .p2align 4 |
| L(ExitHigh4Case3): |
| cmp $5, %ebx |
| je L(Exit5) |
| cmp $6, %ebx |
| je L(Exit6) |
| cmp $7, %ebx |
| je L(Exit7) |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| SAVE_RESULT (8) |
| RETURN1 |
| |
| .p2align 4 |
| L(ExitHigh8Case3): |
| cmp $12, %ebx |
| ja L(ExitHigh12Case3) |
| |
| cmp $9, %ebx |
| je L(Exit9) |
| cmp $10, %ebx |
| je L(Exit10) |
| cmp $11, %ebx |
| je L(Exit11) |
| movlpd (%ecx), %xmm0 |
| movl 8(%ecx), %eax |
| movlpd %xmm0, (%edx) |
| movl %eax, 8(%edx) |
| SAVE_RESULT (12) |
| RETURN1 |
| |
| .p2align 4 |
| L(ExitHigh12Case3): |
| cmp $13, %ebx |
| je L(Exit13) |
| cmp $14, %ebx |
| je L(Exit14) |
| cmp $15, %ebx |
| je L(Exit15) |
| movlpd (%ecx), %xmm0 |
| movlpd 8(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 8(%edx) |
| SAVE_RESULT (16) |
| RETURN1 |
| |
| # endif |
| |
| .p2align 4 |
| L(Exit1): |
| movb (%ecx), %al |
| movb %al, (%edx) |
| SAVE_RESULT (0) |
| # ifdef USE_AS_STRNCPY |
| sub $1, %ebx |
| lea 1(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit2): |
| movw (%ecx), %ax |
| movw %ax, (%edx) |
| SAVE_RESULT (1) |
| # ifdef USE_AS_STRNCPY |
| sub $2, %ebx |
| lea 2(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit3): |
| movw (%ecx), %ax |
| movw %ax, (%edx) |
| movb 2(%ecx), %al |
| movb %al, 2(%edx) |
| SAVE_RESULT (2) |
| # ifdef USE_AS_STRNCPY |
| sub $3, %ebx |
| lea 3(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit5): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| movb 4(%ecx), %al |
| movb %al, 4(%edx) |
| SAVE_RESULT (4) |
| # ifdef USE_AS_STRNCPY |
| sub $5, %ebx |
| lea 5(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit6): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| movw 4(%ecx), %ax |
| movw %ax, 4(%edx) |
| SAVE_RESULT (5) |
| # ifdef USE_AS_STRNCPY |
| sub $6, %ebx |
| lea 6(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit7): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| movl 3(%ecx), %eax |
| movl %eax, 3(%edx) |
| SAVE_RESULT (6) |
| # ifdef USE_AS_STRNCPY |
| sub $7, %ebx |
| lea 7(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit9): |
| movlpd (%ecx), %xmm0 |
| movb 8(%ecx), %al |
| movlpd %xmm0, (%edx) |
| movb %al, 8(%edx) |
| SAVE_RESULT (8) |
| # ifdef USE_AS_STRNCPY |
| sub $9, %ebx |
| lea 9(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit10): |
| movlpd (%ecx), %xmm0 |
| movw 8(%ecx), %ax |
| movlpd %xmm0, (%edx) |
| movw %ax, 8(%edx) |
| SAVE_RESULT (9) |
| # ifdef USE_AS_STRNCPY |
| sub $10, %ebx |
| lea 10(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit11): |
| movlpd (%ecx), %xmm0 |
| movl 7(%ecx), %eax |
| movlpd %xmm0, (%edx) |
| movl %eax, 7(%edx) |
| SAVE_RESULT (10) |
| # ifdef USE_AS_STRNCPY |
| sub $11, %ebx |
| lea 11(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit13): |
| movlpd (%ecx), %xmm0 |
| movlpd 5(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 5(%edx) |
| SAVE_RESULT (12) |
| # ifdef USE_AS_STRNCPY |
| sub $13, %ebx |
| lea 13(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit14): |
| movlpd (%ecx), %xmm0 |
| movlpd 6(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 6(%edx) |
| SAVE_RESULT (13) |
| # ifdef USE_AS_STRNCPY |
| sub $14, %ebx |
| lea 14(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| .p2align 4 |
| L(Exit15): |
| movlpd (%ecx), %xmm0 |
| movlpd 7(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 7(%edx) |
| SAVE_RESULT (14) |
| # ifdef USE_AS_STRNCPY |
| sub $15, %ebx |
| lea 15(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero1) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN1 |
| |
| CFI_POP (%edi) |
| |
| # ifdef USE_AS_STRNCPY |
| .p2align 4 |
| L(Fill0): |
| RETURN |
| |
| .p2align 4 |
| L(Fill1): |
| movb %dl, (%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill2): |
| movw %dx, (%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill3): |
| movw %dx, (%ecx) |
| movb %dl, 2(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill4): |
| movl %edx, (%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill5): |
| movl %edx, (%ecx) |
| movb %dl, 4(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill6): |
| movl %edx, (%ecx) |
| movw %dx, 4(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill7): |
| movl %edx, (%ecx) |
| movl %edx, 3(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill8): |
| movlpd %xmm0, (%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill9): |
| movlpd %xmm0, (%ecx) |
| movb %dl, 8(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill10): |
| movlpd %xmm0, (%ecx) |
| movw %dx, 8(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill11): |
| movlpd %xmm0, (%ecx) |
| movl %edx, 7(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill12): |
| movlpd %xmm0, (%ecx) |
| movl %edx, 8(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill13): |
| movlpd %xmm0, (%ecx) |
| movlpd %xmm0, 5(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill14): |
| movlpd %xmm0, (%ecx) |
| movlpd %xmm0, 6(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill15): |
| movlpd %xmm0, (%ecx) |
| movlpd %xmm0, 7(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(Fill16): |
| movlpd %xmm0, (%ecx) |
| movlpd %xmm0, 8(%ecx) |
| RETURN |
| |
| .p2align 4 |
| L(StrncpyFillExit1): |
| lea 16(%ebx), %ebx |
| L(FillFrom1To16Bytes): |
| test %ebx, %ebx |
| jz L(Fill0) |
| cmp $16, %ebx |
| je L(Fill16) |
| cmp $8, %ebx |
| je L(Fill8) |
| jg L(FillMore8) |
| cmp $4, %ebx |
| je L(Fill4) |
| jg L(FillMore4) |
| cmp $2, %ebx |
| jl L(Fill1) |
| je L(Fill2) |
| jg L(Fill3) |
| L(FillMore8): /* but less than 16 */ |
| cmp $12, %ebx |
| je L(Fill12) |
| jl L(FillLess12) |
| cmp $14, %ebx |
| jl L(Fill13) |
| je L(Fill14) |
| jg L(Fill15) |
| L(FillMore4): /* but less than 8 */ |
| cmp $6, %ebx |
| jl L(Fill5) |
| je L(Fill6) |
| jg L(Fill7) |
| L(FillLess12): /* but more than 8 */ |
| cmp $10, %ebx |
| jl L(Fill9) |
| je L(Fill10) |
| jmp L(Fill11) |
| |
| CFI_PUSH(%edi) |
| |
| .p2align 4 |
| L(StrncpyFillTailWithZero1): |
| POP (%edi) |
| L(StrncpyFillTailWithZero): |
| pxor %xmm0, %xmm0 |
| xor %edx, %edx |
| sub $16, %ebx |
| jbe L(StrncpyFillExit1) |
| |
| movlpd %xmm0, (%ecx) |
| movlpd %xmm0, 8(%ecx) |
| |
| lea 16(%ecx), %ecx |
| |
| mov %ecx, %edx |
| and $0xf, %edx |
| sub %edx, %ecx |
| add %edx, %ebx |
| xor %edx, %edx |
| sub $64, %ebx |
| jb L(StrncpyFillLess64) |
| |
| L(StrncpyFillLoopMovdqa): |
| movdqa %xmm0, (%ecx) |
| movdqa %xmm0, 16(%ecx) |
| movdqa %xmm0, 32(%ecx) |
| movdqa %xmm0, 48(%ecx) |
| lea 64(%ecx), %ecx |
| sub $64, %ebx |
| jae L(StrncpyFillLoopMovdqa) |
| |
| L(StrncpyFillLess64): |
| add $32, %ebx |
| jl L(StrncpyFillLess32) |
| movdqa %xmm0, (%ecx) |
| movdqa %xmm0, 16(%ecx) |
| lea 32(%ecx), %ecx |
| sub $16, %ebx |
| jl L(StrncpyFillExit1) |
| movdqa %xmm0, (%ecx) |
| lea 16(%ecx), %ecx |
| jmp L(FillFrom1To16Bytes) |
| |
| L(StrncpyFillLess32): |
| add $16, %ebx |
| jl L(StrncpyFillExit1) |
| movdqa %xmm0, (%ecx) |
| lea 16(%ecx), %ecx |
| jmp L(FillFrom1To16Bytes) |
| # endif |
| |
| .p2align 4 |
| L(ExitTail1): |
| movb (%ecx), %al |
| movb %al, (%edx) |
| SAVE_RESULT_TAIL (0) |
| # ifdef USE_AS_STRNCPY |
| sub $1, %ebx |
| lea 1(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail2): |
| movw (%ecx), %ax |
| movw %ax, (%edx) |
| SAVE_RESULT_TAIL (1) |
| # ifdef USE_AS_STRNCPY |
| sub $2, %ebx |
| lea 2(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail3): |
| movw (%ecx), %ax |
| movw %ax, (%edx) |
| movb 2(%ecx), %al |
| movb %al, 2(%edx) |
| SAVE_RESULT_TAIL (2) |
| # ifdef USE_AS_STRNCPY |
| sub $3, %ebx |
| lea 3(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail4): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| SAVE_RESULT_TAIL (3) |
| # ifdef USE_AS_STRNCPY |
| sub $4, %ebx |
| lea 4(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail5): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| movb 4(%ecx), %al |
| movb %al, 4(%edx) |
| SAVE_RESULT_TAIL (4) |
| # ifdef USE_AS_STRNCPY |
| sub $5, %ebx |
| lea 5(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail6): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| movw 4(%ecx), %ax |
| movw %ax, 4(%edx) |
| SAVE_RESULT_TAIL (5) |
| # ifdef USE_AS_STRNCPY |
| sub $6, %ebx |
| lea 6(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail7): |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| movl 3(%ecx), %eax |
| movl %eax, 3(%edx) |
| SAVE_RESULT_TAIL (6) |
| # ifdef USE_AS_STRNCPY |
| sub $7, %ebx |
| lea 7(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail8): |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| SAVE_RESULT_TAIL (7) |
| # ifdef USE_AS_STRNCPY |
| sub $8, %ebx |
| lea 8(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail9): |
| movlpd (%ecx), %xmm0 |
| movb 8(%ecx), %al |
| movlpd %xmm0, (%edx) |
| movb %al, 8(%edx) |
| SAVE_RESULT_TAIL (8) |
| # ifdef USE_AS_STRNCPY |
| sub $9, %ebx |
| lea 9(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail10): |
| movlpd (%ecx), %xmm0 |
| movw 8(%ecx), %ax |
| movlpd %xmm0, (%edx) |
| movw %ax, 8(%edx) |
| SAVE_RESULT_TAIL (9) |
| # ifdef USE_AS_STRNCPY |
| sub $10, %ebx |
| lea 10(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail11): |
| movlpd (%ecx), %xmm0 |
| movl 7(%ecx), %eax |
| movlpd %xmm0, (%edx) |
| movl %eax, 7(%edx) |
| SAVE_RESULT_TAIL (10) |
| # ifdef USE_AS_STRNCPY |
| sub $11, %ebx |
| lea 11(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail12): |
| movlpd (%ecx), %xmm0 |
| movl 8(%ecx), %eax |
| movlpd %xmm0, (%edx) |
| movl %eax, 8(%edx) |
| SAVE_RESULT_TAIL (11) |
| # ifdef USE_AS_STRNCPY |
| sub $12, %ebx |
| lea 12(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail13): |
| movlpd (%ecx), %xmm0 |
| movlpd 5(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 5(%edx) |
| SAVE_RESULT_TAIL (12) |
| # ifdef USE_AS_STRNCPY |
| sub $13, %ebx |
| lea 13(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail14): |
| movlpd (%ecx), %xmm0 |
| movlpd 6(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 6(%edx) |
| SAVE_RESULT_TAIL (13) |
| # ifdef USE_AS_STRNCPY |
| sub $14, %ebx |
| lea 14(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail15): |
| movlpd (%ecx), %xmm0 |
| movlpd 7(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 7(%edx) |
| SAVE_RESULT_TAIL (14) |
| # ifdef USE_AS_STRNCPY |
| sub $15, %ebx |
| lea 15(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(ExitTail16): |
| movdqu (%ecx), %xmm0 |
| movdqu %xmm0, (%edx) |
| SAVE_RESULT_TAIL (15) |
| # ifdef USE_AS_STRNCPY |
| sub $16, %ebx |
| lea 16(%edx), %ecx |
| jnz L(StrncpyFillTailWithZero) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| # endif |
| RETURN |
| #endif |
| |
| #ifdef USE_AS_STRNCPY |
| # ifndef USE_AS_STRCAT |
| CFI_PUSH (%esi) |
| CFI_PUSH (%edi) |
| # endif |
| .p2align 4 |
| L(StrncpyLeaveCase2OrCase3): |
| test %eax, %eax |
| jnz L(Aligned64LeaveCase2) |
| |
| L(Aligned64LeaveCase3): |
| add $48, %ebx |
| jle L(CopyFrom1To16BytesCase3) |
| movaps %xmm4, -64(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase3) |
| movaps %xmm5, -48(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase3) |
| movaps %xmm6, -32(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(Aligned64LeaveCase2): |
| pcmpeqb %xmm4, %xmm0 |
| pmovmskb %xmm0, %eax |
| add $48, %ebx |
| jle L(CopyFrom1To16BytesCase2OrCase3) |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| pcmpeqb %xmm5, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm4, -64(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| pcmpeqb %xmm6, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm5, -48(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(CopyFrom1To16BytesCase2OrCase3) |
| test %eax, %eax |
| jnz L(CopyFrom1To16Bytes) |
| |
| pcmpeqb %xmm7, %xmm0 |
| pmovmskb %xmm0, %eax |
| movaps %xmm6, -32(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| jmp L(CopyFrom1To16BytesCase2) |
| |
| /*--------------------------------------------------*/ |
| .p2align 4 |
| L(StrncpyExit1Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movlpd 7(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 7(%edx) |
| mov $15, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit2Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movlpd 6(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 6(%edx) |
| mov $14, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit3Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movlpd 5(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 5(%edx) |
| mov $13, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit4Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movl 8(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 8(%edx) |
| mov $12, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit5Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movl 7(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 7(%edx) |
| mov $11, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit6Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movl 6(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 6(%edx) |
| mov $10, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit7Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movl 5(%ecx), %esi |
| movlpd %xmm0, (%edx) |
| movl %esi, 5(%edx) |
| mov $9, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit8Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| mov $8, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit9Case2OrCase3): |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| mov $7, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit10Case2OrCase3): |
| movlpd -1(%ecx), %xmm0 |
| movlpd %xmm0, -1(%edx) |
| mov $6, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit11Case2OrCase3): |
| movlpd -2(%ecx), %xmm0 |
| movlpd %xmm0, -2(%edx) |
| mov $5, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit12Case2OrCase3): |
| movl (%ecx), %esi |
| movl %esi, (%edx) |
| mov $4, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit13Case2OrCase3): |
| movl -1(%ecx), %esi |
| movl %esi, -1(%edx) |
| mov $3, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit14Case2OrCase3): |
| movl -2(%ecx), %esi |
| movl %esi, -2(%edx) |
| mov $2, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| .p2align 4 |
| L(StrncpyExit15Case2OrCase3): |
| movl -3(%ecx), %esi |
| movl %esi, -3(%edx) |
| mov $1, %esi |
| test %eax, %eax |
| jnz L(CopyFrom1To16BytesCase2) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave1): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit1) |
| palignr $1, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 31(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit1) |
| palignr $1, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit1) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit1) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit1): |
| lea 15(%edx, %esi), %edx |
| lea 15(%ecx, %esi), %ecx |
| movdqu -16(%ecx), %xmm0 |
| xor %esi, %esi |
| movdqu %xmm0, -16(%edx) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave2): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit2) |
| palignr $2, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 30(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit2) |
| palignr $2, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit2) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit2) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit2): |
| lea 14(%edx, %esi), %edx |
| lea 14(%ecx, %esi), %ecx |
| movdqu -16(%ecx), %xmm0 |
| xor %esi, %esi |
| movdqu %xmm0, -16(%edx) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave3): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit3) |
| palignr $3, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 29(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit3) |
| palignr $3, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit3) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit3) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit3): |
| lea 13(%edx, %esi), %edx |
| lea 13(%ecx, %esi), %ecx |
| movdqu -16(%ecx), %xmm0 |
| xor %esi, %esi |
| movdqu %xmm0, -16(%edx) |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave4): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit4) |
| palignr $4, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 28(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit4) |
| palignr $4, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit4) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit4) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit4): |
| lea 12(%edx, %esi), %edx |
| lea 12(%ecx, %esi), %ecx |
| movlpd -12(%ecx), %xmm0 |
| movl -4(%ecx), %eax |
| movlpd %xmm0, -12(%edx) |
| movl %eax, -4(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave5): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit5) |
| palignr $5, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 27(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit5) |
| palignr $5, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit5) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit5) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit5): |
| lea 11(%edx, %esi), %edx |
| lea 11(%ecx, %esi), %ecx |
| movlpd -11(%ecx), %xmm0 |
| movl -4(%ecx), %eax |
| movlpd %xmm0, -11(%edx) |
| movl %eax, -4(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave6): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit6) |
| palignr $6, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 26(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit6) |
| palignr $6, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit6) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit6) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit6): |
| lea 10(%edx, %esi), %edx |
| lea 10(%ecx, %esi), %ecx |
| |
| movlpd -10(%ecx), %xmm0 |
| movw -2(%ecx), %ax |
| movlpd %xmm0, -10(%edx) |
| movw %ax, -2(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave7): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit7) |
| palignr $7, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 25(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit7) |
| palignr $7, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit7) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit7) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit7): |
| lea 9(%edx, %esi), %edx |
| lea 9(%ecx, %esi), %ecx |
| |
| movlpd -9(%ecx), %xmm0 |
| movb -1(%ecx), %ah |
| movlpd %xmm0, -9(%edx) |
| movb %ah, -1(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave8): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit8) |
| palignr $8, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 24(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit8) |
| palignr $8, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit8) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit8) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit8): |
| lea 8(%edx, %esi), %edx |
| lea 8(%ecx, %esi), %ecx |
| movlpd -8(%ecx), %xmm0 |
| movlpd %xmm0, -8(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave9): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit9) |
| palignr $9, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 23(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit9) |
| palignr $9, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit9) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit9) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit9): |
| lea 7(%edx, %esi), %edx |
| lea 7(%ecx, %esi), %ecx |
| |
| movlpd -8(%ecx), %xmm0 |
| movlpd %xmm0, -8(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave10): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit10) |
| palignr $10, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 22(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit10) |
| palignr $10, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit10) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit10) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit10): |
| lea 6(%edx, %esi), %edx |
| lea 6(%ecx, %esi), %ecx |
| |
| movlpd -8(%ecx), %xmm0 |
| movlpd %xmm0, -8(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave11): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit11) |
| palignr $11, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 21(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit11) |
| palignr $11, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit11) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit11) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit11): |
| lea 5(%edx, %esi), %edx |
| lea 5(%ecx, %esi), %ecx |
| movl -5(%ecx), %esi |
| movb -1(%ecx), %ah |
| movl %esi, -5(%edx) |
| movb %ah, -1(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave12): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit12) |
| palignr $12, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 20(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit12) |
| palignr $12, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit12) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit12) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit12): |
| lea 4(%edx, %esi), %edx |
| lea 4(%ecx, %esi), %ecx |
| movl -4(%ecx), %eax |
| movl %eax, -4(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave13): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit13) |
| palignr $13, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 19(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit13) |
| palignr $13, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit13) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit13) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit13): |
| lea 3(%edx, %esi), %edx |
| lea 3(%ecx, %esi), %ecx |
| |
| movl -4(%ecx), %eax |
| movl %eax, -4(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave14): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit14) |
| palignr $14, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 18(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit14) |
| palignr $14, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit14) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit14) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit14): |
| lea 2(%edx, %esi), %edx |
| lea 2(%ecx, %esi), %ecx |
| movw -2(%ecx), %ax |
| movw %ax, -2(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| |
| L(StrncpyLeave15): |
| movaps %xmm2, %xmm3 |
| add $48, %ebx |
| jle L(StrncpyExit15) |
| palignr $15, %xmm1, %xmm2 |
| movaps %xmm2, (%edx) |
| movaps 17(%ecx), %xmm2 |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit15) |
| palignr $15, %xmm3, %xmm2 |
| movaps %xmm2, 16(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit15) |
| movaps %xmm4, 32(%edx) |
| lea 16(%esi), %esi |
| sub $16, %ebx |
| jbe L(StrncpyExit15) |
| movaps %xmm5, 48(%edx) |
| lea 16(%esi), %esi |
| lea -16(%ebx), %ebx |
| L(StrncpyExit15): |
| lea 1(%edx, %esi), %edx |
| lea 1(%ecx, %esi), %ecx |
| movb -1(%ecx), %ah |
| movb %ah, -1(%edx) |
| xor %esi, %esi |
| jmp L(CopyFrom1To16BytesCase3) |
| #endif |
| |
| #if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY |
| # ifdef USE_AS_STRNCPY |
| CFI_POP (%esi) |
| CFI_POP (%edi) |
| |
| .p2align 4 |
| L(ExitTail0): |
| movl %edx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(StrncpyExit15Bytes): |
| cmp $12, %ebx |
| jbe L(StrncpyExit12Bytes) |
| cmpb $0, 8(%ecx) |
| jz L(ExitTail9) |
| cmpb $0, 9(%ecx) |
| jz L(ExitTail10) |
| cmpb $0, 10(%ecx) |
| jz L(ExitTail11) |
| cmpb $0, 11(%ecx) |
| jz L(ExitTail12) |
| cmp $13, %ebx |
| je L(ExitTail13) |
| cmpb $0, 12(%ecx) |
| jz L(ExitTail13) |
| cmp $14, %ebx |
| je L(ExitTail14) |
| cmpb $0, 13(%ecx) |
| jz L(ExitTail14) |
| movlpd (%ecx), %xmm0 |
| movlpd 7(%ecx), %xmm1 |
| movlpd %xmm0, (%edx) |
| movlpd %xmm1, 7(%edx) |
| # ifdef USE_AS_STPCPY |
| lea 14(%edx), %eax |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # else |
| movl %edx, %eax |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(StrncpyExit12Bytes): |
| cmp $9, %ebx |
| je L(ExitTail9) |
| cmpb $0, 8(%ecx) |
| jz L(ExitTail9) |
| cmp $10, %ebx |
| je L(ExitTail10) |
| cmpb $0, 9(%ecx) |
| jz L(ExitTail10) |
| cmp $11, %ebx |
| je L(ExitTail11) |
| cmpb $0, 10(%ecx) |
| jz L(ExitTail11) |
| movlpd (%ecx), %xmm0 |
| movl 8(%ecx), %eax |
| movlpd %xmm0, (%edx) |
| movl %eax, 8(%edx) |
| SAVE_RESULT_TAIL (11) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(StrncpyExit8Bytes): |
| cmp $4, %ebx |
| jbe L(StrncpyExit4Bytes) |
| cmpb $0, (%ecx) |
| jz L(ExitTail1) |
| cmpb $0, 1(%ecx) |
| jz L(ExitTail2) |
| cmpb $0, 2(%ecx) |
| jz L(ExitTail3) |
| cmpb $0, 3(%ecx) |
| jz L(ExitTail4) |
| |
| cmp $5, %ebx |
| je L(ExitTail5) |
| cmpb $0, 4(%ecx) |
| jz L(ExitTail5) |
| cmp $6, %ebx |
| je L(ExitTail6) |
| cmpb $0, 5(%ecx) |
| jz L(ExitTail6) |
| cmp $7, %ebx |
| je L(ExitTail7) |
| cmpb $0, 6(%ecx) |
| jz L(ExitTail7) |
| movlpd (%ecx), %xmm0 |
| movlpd %xmm0, (%edx) |
| # ifdef USE_AS_STPCPY |
| lea 7(%edx), %eax |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # else |
| movl %edx, %eax |
| # endif |
| RETURN |
| |
| .p2align 4 |
| L(StrncpyExit4Bytes): |
| test %ebx, %ebx |
| jz L(ExitTail0) |
| cmp $1, %ebx |
| je L(ExitTail1) |
| cmpb $0, (%ecx) |
| jz L(ExitTail1) |
| cmp $2, %ebx |
| je L(ExitTail2) |
| cmpb $0, 1(%ecx) |
| jz L(ExitTail2) |
| cmp $3, %ebx |
| je L(ExitTail3) |
| cmpb $0, 2(%ecx) |
| jz L(ExitTail3) |
| movl (%ecx), %eax |
| movl %eax, (%edx) |
| SAVE_RESULT_TAIL (3) |
| # ifdef USE_AS_STPCPY |
| cmpb $1, (%eax) |
| sbb $-1, %eax |
| # endif |
| RETURN |
| # endif |
| |
| END (STRCPY) |
| #endif |