| @/****************************************************************************** |
| @ * |
| @ * Copyright (C) 2015 The Android Open Source Project |
| @ * |
| @ * Licensed under the Apache License, Version 2.0 (the "License"); |
| @ * you may not use this file except in compliance with the License. |
| @ * You may obtain a copy of the License at: |
| @ * |
| @ * http://www.apache.org/licenses/LICENSE-2.0 |
| @ * |
| @ * Unless required by applicable law or agreed to in writing, software |
| @ * distributed under the License is distributed on an "AS IS" BASIS, |
| @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ * See the License for the specific language governing permissions and |
| @ * limitations under the License. |
| @ * |
| @ ***************************************************************************** |
| @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| @*/ |
| @** |
| @ ******************************************************************************* |
| @ * @file |
| @ * ih264_mem_fns_neon.s |
| @ * |
| @ * @brief |
| @ * Contains function definitions for memory manipulation |
| @ * |
| @ * @author |
| @ * Naveen SR |
| @ * |
| @ * @par List of Functions: |
| @ * - ih264_memcpy_mul_8_a9q() |
| @ * - ih264_memcpy_a9q() |
| @ * - ih264_memset_mul_8_a9q() |
| @ * - ih264_memset_a9q() |
| @ * - ih264_memset_16bit_mul_8_a9q() |
| @ * - ih264_memset_a9q() |
| @ * |
| @ * @remarks |
| @ * None |
| @ * |
| @ ******************************************************************************* |
| @* |
| |
| @** |
| @******************************************************************************* |
| @* |
| @* @brief |
| @* memcpy of a 1d array |
| @* |
| @* @par Description: |
| @* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes |
| @* |
| @* @param[in] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[in] num_bytes |
| @* number of bytes to copy |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @* |
| @void ih264_memcpy_mul_8(UWORD8 *pu1_dst, |
| @ UWORD8 *pu1_src, |
| @ UWORD8 num_bytes) |
| @**************Variables Vs Registers************************* |
| @ r0 => *pu1_dst |
| @ r1 => *pu1_src |
| @ r2 => num_bytes |
| |
| .text |
| .p2align 2 |
| |
| |
| .global ih264_memcpy_mul_8_a9q |
| |
| ih264_memcpy_mul_8_a9q: |
| |
| loop_neon_memcpy_mul_8: |
| @ Memcpy 8 bytes |
| vld1.8 d0, [r1]! |
| vst1.8 d0, [r0]! |
| |
| subs r2, r2, #8 |
| bne loop_neon_memcpy_mul_8 |
| bx lr |
| |
| |
| |
| @******************************************************************************* |
| @* |
| @void ih264_memcpy(UWORD8 *pu1_dst, |
| @ UWORD8 *pu1_src, |
| @ UWORD8 num_bytes) |
| @**************Variables Vs Registers************************* |
| @ r0 => *pu1_dst |
| @ r1 => *pu1_src |
| @ r2 => num_bytes |
| |
| |
| |
| .global ih264_memcpy_a9q |
| |
| ih264_memcpy_a9q: |
| subs r2, #8 |
| blt memcpy |
| loop_neon_memcpy: |
| @ Memcpy 8 bytes |
| vld1.8 d0, [r1]! |
| vst1.8 d0, [r0]! |
| |
| subs r2, #8 |
| bge loop_neon_memcpy |
| cmp r2, #-8 |
| bxeq lr |
| |
| memcpy: |
| add r2, #8 |
| |
| loop_memcpy: |
| ldrb r3, [r1], #1 |
| strb r3, [r0], #1 |
| subs r2, #1 |
| bne loop_memcpy |
| bx lr |
| |
| |
| |
| |
| @void ih264_memset_mul_8(UWORD8 *pu1_dst, |
| @ UWORD8 value, |
| @ UWORD8 num_bytes) |
| @**************Variables Vs Registers************************* |
| @ r0 => *pu1_dst |
| @ r1 => value |
| @ r2 => num_bytes |
| |
| |
| |
| |
| |
| .global ih264_memset_mul_8_a9q |
| |
| ih264_memset_mul_8_a9q: |
| |
| @ Assumptions: numbytes is either 8, 16 or 32 |
| vdup.8 d0, r1 |
| loop_memset_mul_8: |
| @ Memset 8 bytes |
| vst1.8 d0, [r0]! |
| |
| subs r2, r2, #8 |
| bne loop_memset_mul_8 |
| |
| bx lr |
| |
| |
| |
| |
| @void ih264_memset(UWORD8 *pu1_dst, |
| @ UWORD8 value, |
| @ UWORD8 num_bytes) |
| @**************Variables Vs Registers************************* |
| @ r0 => *pu1_dst |
| @ r1 => value |
| @ r2 => num_bytes |
| |
| |
| |
| .global ih264_memset_a9q |
| |
| ih264_memset_a9q: |
| subs r2, #8 |
| blt memset |
| vdup.8 d0, r1 |
| loop_neon_memset: |
| @ Memcpy 8 bytes |
| vst1.8 d0, [r0]! |
| |
| subs r2, #8 |
| bge loop_neon_memset |
| cmp r2, #-8 |
| bxeq lr |
| |
| memset: |
| add r2, #8 |
| |
| loop_memset: |
| strb r1, [r0], #1 |
| subs r2, #1 |
| bne loop_memset |
| bx lr |
| |
| |
| |
| |
| @void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, |
| @ UWORD16 value, |
| @ UWORD8 num_words) |
| @**************Variables Vs Registers************************* |
| @ r0 => *pu2_dst |
| @ r1 => value |
| @ r2 => num_words |
| |
| |
| |
| |
| |
| .global ih264_memset_16bit_mul_8_a9q |
| |
| ih264_memset_16bit_mul_8_a9q: |
| |
| @ Assumptions: num_words is either 8, 16 or 32 |
| |
| @ Memset 8 words |
| vdup.16 d0, r1 |
| loop_memset_16bit_mul_8: |
| vst1.16 d0, [r0]! |
| vst1.16 d0, [r0]! |
| |
| subs r2, r2, #8 |
| bne loop_memset_16bit_mul_8 |
| |
| bx lr |
| |
| |
| |
| |
| @void ih264_memset_16bit(UWORD16 *pu2_dst, |
| @ UWORD16 value, |
| @ UWORD8 num_words) |
| @**************Variables Vs Registers************************* |
| @ r0 => *pu2_dst |
| @ r1 => value |
| @ r2 => num_words |
| |
| |
| |
| .global ih264_memset_16bit_a9q |
| |
| ih264_memset_16bit_a9q: |
| subs r2, #8 |
| blt memset_16bit |
| vdup.16 d0, r1 |
| loop_neon_memset_16bit: |
| @ Memset 8 words |
| vst1.16 d0, [r0]! |
| vst1.16 d0, [r0]! |
| |
| subs r2, #8 |
| bge loop_neon_memset_16bit |
| cmp r2, #-8 |
| bxeq lr |
| |
| memset_16bit: |
| add r2, #8 |
| |
| loop_memset_16bit: |
| strh r1, [r0], #2 |
| subs r2, #1 |
| bne loop_memset_16bit |
| bx lr |
| |
| |
| |
| |