| /* |
| * Copyright © 2008 Mozilla Corporation |
| * Copyright © 2010 Nokia Corporation |
| * |
| * Permission to use, copy, modify, distribute, and sell this software and its |
| * documentation for any purpose is hereby granted without fee, provided that |
| * the above copyright notice appear in all copies and that both that |
| * copyright notice and this permission notice appear in supporting |
| * documentation, and that the name of Mozilla Corporation not be used in |
| * advertising or publicity pertaining to distribution of the software without |
| * specific, written prior permission. Mozilla Corporation makes no |
| * representations about the suitability of this software for any purpose. It |
| * is provided "as is" without express or implied warranty. |
| * |
| * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS |
| * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN |
| * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING |
| * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
| * SOFTWARE. |
| * |
| * Author: Jeff Muizelaar (jeff@infidigm.net) |
| * |
| */ |
| |
| /* Prevent the stack from becoming executable */ |
| #if defined(__linux__) && defined(__ELF__) |
| .section .note.GNU-stack,"",%progbits |
| #endif |
| |
| .text |
| .arch armv6 |
| .object_arch armv4 |
| .arm |
| .altmacro |
| .p2align 2 |
| |
| /* Supplementary macro for setting function attributes */ |
| .macro pixman_asm_function fname |
| .func fname |
| .global fname |
| #ifdef __ELF__ |
| .hidden fname |
| .type fname, %function |
| #endif |
| fname: |
| .endm |
| |
| /* |
| * Note: This code is only using armv5te instructions (not even armv6), |
| * but is scheduled for ARM Cortex-A8 pipeline. So it might need to |
| * be split into a few variants, tuned for each microarchitecture. |
| * |
| * TODO: In order to get good performance on ARM9/ARM11 cores (which don't |
| * have efficient write combining), it needs to be changed to use 16-byte |
| * aligned writes using STM instruction. |
| * |
| * Nearest scanline scaler macro template uses the following arguments: |
| * fname - name of the function to generate |
| * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes |
| * t - type suffix for LDR/STR instructions |
| * prefetch_distance - prefetch in the source image by that many |
| * pixels ahead |
| * prefetch_braking_distance - stop prefetching when that many pixels are |
| * remaining before the end of scanline |
| */ |
| |
| .macro generate_nearest_scanline_func fname, bpp_shift, t, \ |
| prefetch_distance, \ |
| prefetch_braking_distance |
| |
| pixman_asm_function fname |
| W .req r0 |
| DST .req r1 |
| SRC .req r2 |
| VX .req r3 |
| UNIT_X .req ip |
| TMP1 .req r4 |
| TMP2 .req r5 |
| VXMASK .req r6 |
| PF_OFFS .req r7 |
| SRC_WIDTH_FIXED .req r8 |
| |
| ldr UNIT_X, [sp] |
| push {r4, r5, r6, r7, r8, r10} |
| mvn VXMASK, #((1 << bpp_shift) - 1) |
| ldr SRC_WIDTH_FIXED, [sp, #28] |
| |
| /* define helper macro */ |
| .macro scale_2_pixels |
| ldr&t TMP1, [SRC, TMP1] |
| and TMP2, VXMASK, VX, asr #(16 - bpp_shift) |
| adds VX, VX, UNIT_X |
| str&t TMP1, [DST], #(1 << bpp_shift) |
| 9: subpls VX, VX, SRC_WIDTH_FIXED |
| bpl 9b |
| |
| ldr&t TMP2, [SRC, TMP2] |
| and TMP1, VXMASK, VX, asr #(16 - bpp_shift) |
| adds VX, VX, UNIT_X |
| str&t TMP2, [DST], #(1 << bpp_shift) |
| 9: subpls VX, VX, SRC_WIDTH_FIXED |
| bpl 9b |
| .endm |
| |
| /* now do the scaling */ |
| and TMP1, VXMASK, VX, asr #(16 - bpp_shift) |
| adds VX, VX, UNIT_X |
| 9: subpls VX, VX, SRC_WIDTH_FIXED |
| bpl 9b |
| subs W, W, #(8 + prefetch_braking_distance) |
| blt 2f |
| /* calculate prefetch offset */ |
| mov PF_OFFS, #prefetch_distance |
| mla PF_OFFS, UNIT_X, PF_OFFS, VX |
| 1: /* main loop, process 8 pixels per iteration with prefetch */ |
| pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] |
| add PF_OFFS, UNIT_X, lsl #3 |
| scale_2_pixels |
| scale_2_pixels |
| scale_2_pixels |
| scale_2_pixels |
| subs W, W, #8 |
| bge 1b |
| 2: |
| subs W, W, #(4 - 8 - prefetch_braking_distance) |
| blt 2f |
| 1: /* process the remaining pixels */ |
| scale_2_pixels |
| scale_2_pixels |
| subs W, W, #4 |
| bge 1b |
| 2: |
| tst W, #2 |
| beq 2f |
| scale_2_pixels |
| 2: |
| tst W, #1 |
| ldrne&t TMP1, [SRC, TMP1] |
| strne&t TMP1, [DST] |
| /* cleanup helper macro */ |
| .purgem scale_2_pixels |
| .unreq DST |
| .unreq SRC |
| .unreq W |
| .unreq VX |
| .unreq UNIT_X |
| .unreq TMP1 |
| .unreq TMP2 |
| .unreq VXMASK |
| .unreq PF_OFFS |
| .unreq SRC_WIDTH_FIXED |
| /* return */ |
| pop {r4, r5, r6, r7, r8, r10} |
| bx lr |
| .endfunc |
| .endm |
| |
| generate_nearest_scanline_func \ |
| pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 |
| |
| generate_nearest_scanline_func \ |
| pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 |