| /* $OpenBSD: memset.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $ */ |
| /* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ |
| |
| /*- |
| * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include <machine/asm.h> |
| |
| #define REG_PTR r0 |
| #define REG_TMP1 r1 |
| |
| #ifdef BZERO |
| # define REG_C r2 |
| # define REG_DST r4 |
| # define REG_LEN r5 |
| #else |
| # define REG_DST0 r3 |
| # define REG_DST r4 |
| # define REG_C r5 |
| # define REG_LEN r6 |
| #endif |
| |
| #ifdef BZERO |
| ENTRY(bzero) |
| #else |
| ENTRY(memset) |
| mov REG_DST,REG_DST0 /* for return value */ |
| #endif |
| /* small amount to fill ? */ |
| mov #28,REG_TMP1 |
| cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ |
| bt/s large |
| mov #12,REG_TMP1 /* if (len >= 12) goto small; */ |
| cmp/hs REG_TMP1,REG_LEN |
| bt/s small |
| #ifdef BZERO |
| mov #0,REG_C |
| #endif |
| /* very little fill (0 ~ 11 bytes) */ |
| tst REG_LEN,REG_LEN |
| add REG_DST,REG_LEN |
| bt/s done |
| add #1,REG_DST |
| |
| /* unroll 4 loops */ |
| cmp/eq REG_DST,REG_LEN |
| 1: mov.b REG_C,@-REG_LEN |
| bt/s done |
| cmp/eq REG_DST,REG_LEN |
| mov.b REG_C,@-REG_LEN |
| bt/s done |
| cmp/eq REG_DST,REG_LEN |
| mov.b REG_C,@-REG_LEN |
| bt/s done |
| cmp/eq REG_DST,REG_LEN |
| mov.b REG_C,@-REG_LEN |
| bf/s 1b |
| cmp/eq REG_DST,REG_LEN |
| done: |
| #ifdef BZERO |
| rts |
| nop |
| #else |
| rts |
| mov REG_DST0,r0 |
| #endif |
| |
| |
| small: |
| mov REG_DST,r0 |
| tst #1,r0 |
| bt/s small_aligned |
| mov REG_DST,REG_TMP1 |
| shll REG_LEN |
| mova 1f,r0 /* 1f must be 4bytes aligned! */ |
| add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ |
| sub REG_LEN,r0 |
| jmp @r0 |
| mov REG_C,r0 |
| |
| .align 2 |
| mov.b r0,@(15,REG_TMP1) |
| mov.b r0,@(14,REG_TMP1) |
| mov.b r0,@(13,REG_TMP1) |
| mov.b r0,@(12,REG_TMP1) |
| mov.b r0,@(11,REG_TMP1) |
| mov.b r0,@(10,REG_TMP1) |
| mov.b r0,@(9,REG_TMP1) |
| mov.b r0,@(8,REG_TMP1) |
| mov.b r0,@(7,REG_TMP1) |
| mov.b r0,@(6,REG_TMP1) |
| mov.b r0,@(5,REG_TMP1) |
| mov.b r0,@(4,REG_TMP1) |
| mov.b r0,@(3,REG_TMP1) |
| mov.b r0,@(2,REG_TMP1) |
| mov.b r0,@(1,REG_TMP1) |
| mov.b r0,@REG_TMP1 |
| mov.b r0,@(15,REG_DST) |
| mov.b r0,@(14,REG_DST) |
| mov.b r0,@(13,REG_DST) |
| mov.b r0,@(12,REG_DST) |
| mov.b r0,@(11,REG_DST) |
| mov.b r0,@(10,REG_DST) |
| mov.b r0,@(9,REG_DST) |
| mov.b r0,@(8,REG_DST) |
| mov.b r0,@(7,REG_DST) |
| mov.b r0,@(6,REG_DST) |
| mov.b r0,@(5,REG_DST) |
| mov.b r0,@(4,REG_DST) |
| mov.b r0,@(3,REG_DST) |
| mov.b r0,@(2,REG_DST) |
| mov.b r0,@(1,REG_DST) |
| #ifdef BZERO |
| rts |
| 1: mov.b r0,@REG_DST |
| #else |
| mov.b r0,@REG_DST |
| 1: rts |
| mov REG_DST0,r0 |
| #endif |
| |
| |
| /* 2 bytes aligned small fill */ |
| small_aligned: |
| #ifndef BZERO |
| extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ |
| shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ |
| or REG_TMP1,REG_C /* REG_C = ????xxxx */ |
| #endif |
| |
| mov REG_LEN,r0 |
| tst #1,r0 /* len is aligned? */ |
| bt/s 1f |
| add #-1,r0 |
| mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ |
| mov r0,REG_LEN |
| 1: |
| |
| mova 1f,r0 /* 1f must be 4bytes aligned! */ |
| sub REG_LEN,r0 |
| jmp @r0 |
| mov REG_C,r0 |
| |
| .align 2 |
| mov.w r0,@(30,REG_DST) |
| mov.w r0,@(28,REG_DST) |
| mov.w r0,@(26,REG_DST) |
| mov.w r0,@(24,REG_DST) |
| mov.w r0,@(22,REG_DST) |
| mov.w r0,@(20,REG_DST) |
| mov.w r0,@(18,REG_DST) |
| mov.w r0,@(16,REG_DST) |
| mov.w r0,@(14,REG_DST) |
| mov.w r0,@(12,REG_DST) |
| mov.w r0,@(10,REG_DST) |
| mov.w r0,@(8,REG_DST) |
| mov.w r0,@(6,REG_DST) |
| mov.w r0,@(4,REG_DST) |
| mov.w r0,@(2,REG_DST) |
| #ifdef BZERO |
| rts |
| 1: mov.w r0,@REG_DST |
| #else |
| mov.w r0,@REG_DST |
| 1: rts |
| mov REG_DST0,r0 |
| #endif |
| |
| |
| |
| .align 2 |
| large: |
| #ifdef BZERO |
| mov #0,REG_C |
| #else |
| extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ |
| shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ |
| or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ |
| swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ |
| xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ |
| #endif |
| |
| mov #3,REG_TMP1 |
| tst REG_TMP1,REG_DST |
| mov REG_DST,REG_PTR |
| bf/s unaligned_dst |
| add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ |
| tst REG_TMP1,REG_LEN |
| bf/s unaligned_len |
| |
| aligned: |
| /* fill 32*n bytes */ |
| mov #32,REG_TMP1 |
| cmp/hi REG_LEN,REG_TMP1 |
| bt 9f |
| .align 2 |
| 1: sub REG_TMP1,REG_PTR |
| mov.l REG_C,@REG_PTR |
| sub REG_TMP1,REG_LEN |
| mov.l REG_C,@(4,REG_PTR) |
| cmp/hi REG_LEN,REG_TMP1 |
| mov.l REG_C,@(8,REG_PTR) |
| mov.l REG_C,@(12,REG_PTR) |
| mov.l REG_C,@(16,REG_PTR) |
| mov.l REG_C,@(20,REG_PTR) |
| mov.l REG_C,@(24,REG_PTR) |
| bf/s 1b |
| mov.l REG_C,@(28,REG_PTR) |
| 9: |
| |
| /* fill left 4*n bytes */ |
| cmp/eq REG_DST,REG_PTR |
| bt 9f |
| add #4,REG_DST |
| cmp/eq REG_DST,REG_PTR |
| 1: mov.l REG_C,@-REG_PTR |
| bt/s 9f |
| cmp/eq REG_DST,REG_PTR |
| mov.l REG_C,@-REG_PTR |
| bt/s 9f |
| cmp/eq REG_DST,REG_PTR |
| mov.l REG_C,@-REG_PTR |
| bt/s 9f |
| cmp/eq REG_DST,REG_PTR |
| mov.l REG_C,@-REG_PTR |
| bf/s 1b |
| cmp/eq REG_DST,REG_PTR |
| 9: |
| #ifdef BZERO |
| rts |
| nop |
| #else |
| rts |
| mov REG_DST0,r0 |
| #endif |
| |
| |
| unaligned_dst: |
| mov #1,REG_TMP1 |
| tst REG_TMP1,REG_DST /* if (dst & 1) { */ |
| add #1,REG_TMP1 |
| bt/s 2f |
| tst REG_TMP1,REG_DST |
| mov.b REG_C,@REG_DST /* *dst++ = c; */ |
| add #1,REG_DST |
| tst REG_TMP1,REG_DST |
| 2: /* } */ |
| /* if (dst & 2) { */ |
| bt 4f |
| mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */ |
| add #2,REG_DST |
| 4: /* } */ |
| |
| |
| tst #3,REG_PTR /* if (ptr & 3) { */ |
| bt/s 4f /* */ |
| unaligned_len: |
| tst #1,REG_PTR /* if (ptr & 1) { */ |
| bt/s 2f |
| tst #2,REG_PTR |
| mov.b REG_C,@-REG_PTR /* --ptr = c; */ |
| 2: /* } */ |
| /* if (ptr & 2) { */ |
| bt 4f |
| mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */ |
| 4: /* } */ |
| /* } */ |
| |
| mov REG_PTR,REG_LEN |
| bra aligned |
| sub REG_DST,REG_LEN |
| |