| @/***************************************************************************** |
| @* |
| @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| @* |
| @* Licensed under the Apache License, Version 2.0 (the "License"); |
| @* you may not use this file except in compliance with the License. |
| @* You may obtain a copy of the License at: |
| @* |
| @* http://www.apache.org/licenses/LICENSE-2.0 |
| @* |
| @* Unless required by applicable law or agreed to in writing, software |
| @* distributed under the License is distributed on an "AS IS" BASIS, |
| @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @* See the License for the specific language governing permissions and |
| @* limitations under the License. |
| @* |
| @*****************************************************************************/ |
| @/** |
| @ ******************************************************************************* |
| @ * @file |
| @ * ihevc_padding_neon.s |
| @ * |
| @ * @brief |
| @ * contains function definitions padding |
| @ * |
| @ * @author |
| @ * naveen sr |
| @ * |
| @ * @par list of functions: |
| @ * - ihevc_pad_left_luma() |
| @ * - ihevc_pad_left_chroma() |
| @ * |
| @ * @remarks |
| @ * none |
| @ * |
| @ ******************************************************************************* |
| @*/ |
| |
| @/** |
| @******************************************************************************* |
| @* |
| @* @brief |
| @* padding (luma block) at the left of a 2d array |
| @* |
| @* @par description: |
| @* the left column of a 2d array is replicated for pad_size times at the left |
| @* |
| @* |
| @* @param[in] pu1_src |
| @* uword8 pointer to the source |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] ht |
| @* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array |
| @* |
| @* @param[in] pad_size |
| @* integer -padding size of the array |
| @* |
| @* @param[in] ht |
| @* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* none |
| @* |
| @******************************************************************************* |
| @*/ |
| @.if pad_left_luma == c |
| @void ihevc_pad_left_luma(uword8 *pu1_src, |
| @ word32 src_strd, |
| @ word32 ht, |
| @ word32 pad_size) |
| @**************variables vs registers************************* |
| @ r0 => *pu1_src |
| @ r1 => src_strd |
| @ r2 => ht |
| @ r3 => pad_size |
| |
| .text |
| .align 4 |
| |
| |
| |
| |
| .globl ihevc_pad_left_luma_a9q |
| |
| .type ihevc_pad_left_luma_a9q, %function |
| |
| ihevc_pad_left_luma_a9q: |
| |
| stmfd sp!, {r4-r11,lr} @stack stores the values of the arguments |
| |
| loop_start_luma_left: |
| @ pad size is assumed to be pad_left = 80 |
| sub r4,r0,r3 |
| |
| ldrb r8,[r0] |
| add r0,r1 |
| ldrb r9,[r0] |
| add r0,r1 |
| ldrb r10,[r0] |
| add r0,r1 |
| ldrb r11,[r0] |
| add r0,r1 |
| |
| vdup.u8 q0,r8 |
| vdup.u8 q1,r9 |
| vdup.u8 q2,r10 |
| vdup.u8 q3,r11 |
| |
| add r5,r4,r1 |
| |
| vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4] @ 16 bytes store |
| |
| add r6,r5,r1 |
| |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store |
| |
| add r7,r6,r1 |
| |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store |
| |
| subs r2,#4 |
| |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| |
| @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| bne loop_start_luma_left |
| |
| ldmfd sp!,{r4-r11,pc} @reload the registers from sp |
| |
| |
| |
| |
| |
| @/** |
| @******************************************************************************* |
| @* |
| @* @brief |
| @* padding (chroma block) at the left of a 2d array |
| @* |
| @* @par description: |
| @* the left column of a 2d array is replicated for pad_size times at the left |
| @* |
| @* |
| @* @param[in] pu1_src |
| @* uword8 pointer to the source |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] ht |
| @* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array (each colour component) |
| @* |
| @* @param[in] pad_size |
| @* integer -padding size of the array |
| @* |
| @* @param[in] ht |
| @* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* none |
| @* |
| @******************************************************************************* |
| @*/ |
| @.if pad_left_chroma == c |
| @void ihevc_pad_left_chroma(uword8 *pu1_src, |
| @ word32 src_strd, |
| @ word32 ht, |
| @ word32 pad_size) |
| @{ |
| @ r0 => *pu1_src |
| @ r1 => src_strd |
| @ r2 => ht |
| @ r3 => pad_size |
| |
| |
| |
| .globl ihevc_pad_left_chroma_a9q |
| |
| .type ihevc_pad_left_chroma_a9q, %function |
| |
| ihevc_pad_left_chroma_a9q: |
| |
| stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments |
| |
| loop_start_chroma_left: |
| @ pad size is assumed to be pad_left = 80 |
| sub r4,r0,r3 |
| |
| ldrh r8,[r0] |
| add r0,r1 |
| ldrh r9,[r0] |
| add r0,r1 |
| ldrh r10,[r0] |
| add r0,r1 |
| ldrh r11,[r0] |
| add r0,r1 |
| |
| vdup.u16 q0,r8 |
| vdup.u16 q1,r9 |
| vdup.u16 q2,r10 |
| vdup.u16 q3,r11 |
| |
| add r5,r4,r1 |
| |
| vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4] @ 16 bytes store |
| |
| add r6,r5,r1 |
| |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store |
| |
| add r7,r6,r1 |
| |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store |
| |
| subs r2,#4 |
| |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| |
| @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| bne loop_start_chroma_left |
| |
| ldmfd sp!,{r4-r11,pc} @reload the registers from sp |
| |
| |
| |
| |
| |
| @/** |
| @******************************************************************************* |
| @* |
| @* @brief |
| @* padding (luma block) at the right of a 2d array |
| @* |
| @* @par description: |
| @* the right column of a 2d array is replicated for pad_size times at the right |
| @* |
| @* |
| @* @param[in] pu1_src |
| @* uword8 pointer to the source |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] ht |
| @* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array |
| @* |
| @* @param[in] pad_size |
| @* integer -padding size of the array |
| @* |
| @* @param[in] ht |
| @* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* none |
| @* |
| @******************************************************************************* |
| @*/ |
| @.if pad_right_luma == c |
| @void ihevc_pad_right_luma(uword8 *pu1_src, |
| @ word32 src_strd, |
| @ word32 ht, |
| @ word32 pad_size) |
| @{ |
| @ word32 row@ |
| @ |
| @ for(row = 0@ row < ht@ row++) |
| @ { |
| @ memset(pu1_src, *(pu1_src -1), pad_size)@ |
| @ |
| @ pu1_src += src_strd@ |
| @ } |
| @} |
| @ |
| @ r0 => *pu1_src |
| @ r1 => src_strd |
| @ r2 => ht |
| @ r3 => pad_size |
| |
| |
| |
| .globl ihevc_pad_right_luma_a9q |
| |
| .type ihevc_pad_right_luma_a9q, %function |
| |
| ihevc_pad_right_luma_a9q: |
| |
| stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments |
| |
| loop_start_luma_right: |
| @ pad size is assumed to be pad_left = 80 |
| mov r4,r0 |
| |
| ldrb r8,[r0, #-1] |
| add r0,r1 |
| ldrb r9,[r0, #-1] |
| add r0,r1 |
| ldrb r10,[r0, #-1] |
| add r0,r1 |
| ldrb r11,[r0, #-1] |
| add r0,r1 |
| |
| add r5,r4,r1 |
| add r6,r5,r1 |
| add r7,r6,r1 |
| |
| vdup.u8 q0,r8 |
| vdup.u8 q1,r9 |
| vdup.u8 q2,r10 |
| vdup.u8 q3,r11 |
| |
| vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4] @ 16 bytes store |
| |
| |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store |
| |
| subs r2,#4 |
| |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store |
| |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store |
| |
| |
| @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| |
| bne loop_start_luma_right |
| |
| ldmfd sp!,{r4-r11,pc} @reload the registers from sp |
| |
| |
| |
| |
| |
| @/** |
| @******************************************************************************* |
| @* |
| @* @brief |
| @@* padding (chroma block) at the right of a 2d array |
| @* |
| @* @par description: |
| @* the right column of a 2d array is replicated for pad_size times at the right |
| @* |
| @* |
| @* @param[in] pu1_src |
| @@* uword8 pointer to the source |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] ht |
| @@* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array (each colour component) |
| @* |
| @* @param[in] pad_size |
| @* integer -padding size of the array |
| @* |
| @* @param[in] ht |
| @@* integer height of the array |
| @* |
| @* @param[in] wd |
| @* integer width of the array |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* none |
| @* |
| @******************************************************************************* |
| @*/ |
| @.if pad_right_chroma == c |
| @void ihevc_pad_right_chroma(uword8 *pu1_src, |
| @ word32 src_strd, |
| @ word32 ht, |
| @ word32 pad_size) |
| @ r0 => *pu1_src |
| @ r1 => src_strd |
| @ r2 => ht |
| @ r3 => pad_size |
| |
| |
| |
| .globl ihevc_pad_right_chroma_a9q |
| |
| .type ihevc_pad_right_chroma_a9q, %function |
| |
| ihevc_pad_right_chroma_a9q: |
| |
| stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments |
| |
| loop_start_chroma_right: |
| @ pad size is assumed to be pad_left = 80 |
| mov r4,r0 |
| |
| ldrh r8,[r0, #-2] |
| add r0,r1 |
| ldrh r9,[r0, #-2] |
| add r0,r1 |
| ldrh r10,[r0, #-2] |
| add r0,r1 |
| ldrh r11,[r0, #-2] |
| add r0,r1 |
| |
| vdup.u16 q0,r8 |
| vdup.u16 q1,r9 |
| vdup.u16 q2,r10 |
| vdup.u16 q3,r11 |
| |
| add r5,r4,r1 |
| |
| vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4]! @ 16 bytes store |
| vst1.8 {d0,d1},[r4] @ 16 bytes store |
| |
| add r6,r5,r1 |
| |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store |
| vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store |
| |
| add r7,r6,r1 |
| |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store |
| vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store |
| |
| subs r2,#4 |
| |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store |
| vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store |
| |
| @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| bne loop_start_chroma_right |
| |
| ldmfd sp!,{r4-r11,pc} @reload the registers from sp |
| |
| |
| |
| |
| |
| |
| |
| |