blob: 08d1f3618231d166aa21fe93ef9878bc035789a6 [file] [log] [blame]
@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/**
@ *******************************************************************************
@ * @file
@ * ihevc_padding_neon.s
@ *
@ * @brief
@ * contains function definitions padding
@ *
@ * @author
@ * naveen sr
@ *
@ * @par list of functions:
@ * - ihevc_pad_left_luma()
@ * - ihevc_pad_left_chroma()
@ *
@ * @remarks
@ * none
@ *
@ *******************************************************************************
@*/
@/**
@*******************************************************************************
@*
@* @brief
@* padding (luma block) at the left of a 2d array
@*
@* @par description:
@* the left column of a 2d array is replicated for pad_size times at the left
@*
@*
@* @param[in] pu1_src
@* uword8 pointer to the source
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] ht
@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array
@*
@* @param[in] pad_size
@* integer -padding size of the array
@*
@* @param[in] ht
@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array
@*
@* @returns
@*
@* @remarks
@* none
@*
@*******************************************************************************
@*/
@.if pad_left_luma == c
@void ihevc_pad_left_luma(uword8 *pu1_src,
@ word32 src_strd,
@ word32 ht,
@ word32 pad_size)
@**************variables vs registers*************************
@ r0 => *pu1_src
@ r1 => src_strd
@ r2 => ht
@ r3 => pad_size
.text
.align 4
.globl ihevc_pad_left_luma_a9q
.type ihevc_pad_left_luma_a9q, %function
ihevc_pad_left_luma_a9q:
stmfd sp!, {r4-r11,lr} @stack stores the values of the arguments
loop_start_luma_left:
@ pad size is assumed to be pad_left = 80
sub r4,r0,r3
ldrb r8,[r0]
add r0,r1
ldrb r9,[r0]
add r0,r1
ldrb r10,[r0]
add r0,r1
ldrb r11,[r0]
add r0,r1
vdup.u8 q0,r8
vdup.u8 q1,r9
vdup.u8 q2,r10
vdup.u8 q3,r11
add r5,r4,r1
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4] @ 16 bytes store
add r6,r5,r1
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
add r7,r6,r1
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
subs r2,#4
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
bne loop_start_luma_left
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
@/**
@*******************************************************************************
@*
@* @brief
@* padding (chroma block) at the left of a 2d array
@*
@* @par description:
@* the left column of a 2d array is replicated for pad_size times at the left
@*
@*
@* @param[in] pu1_src
@* uword8 pointer to the source
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] ht
@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array (each colour component)
@*
@* @param[in] pad_size
@* integer -padding size of the array
@*
@* @param[in] ht
@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array
@*
@* @returns
@*
@* @remarks
@* none
@*
@*******************************************************************************
@*/
@.if pad_left_chroma == c
@void ihevc_pad_left_chroma(uword8 *pu1_src,
@ word32 src_strd,
@ word32 ht,
@ word32 pad_size)
@{
@ r0 => *pu1_src
@ r1 => src_strd
@ r2 => ht
@ r3 => pad_size
.globl ihevc_pad_left_chroma_a9q
.type ihevc_pad_left_chroma_a9q, %function
ihevc_pad_left_chroma_a9q:
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
loop_start_chroma_left:
@ pad size is assumed to be pad_left = 80
sub r4,r0,r3
ldrh r8,[r0]
add r0,r1
ldrh r9,[r0]
add r0,r1
ldrh r10,[r0]
add r0,r1
ldrh r11,[r0]
add r0,r1
vdup.u16 q0,r8
vdup.u16 q1,r9
vdup.u16 q2,r10
vdup.u16 q3,r11
add r5,r4,r1
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4] @ 16 bytes store
add r6,r5,r1
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
add r7,r6,r1
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
subs r2,#4
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
bne loop_start_chroma_left
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
@/**
@*******************************************************************************
@*
@* @brief
@* padding (luma block) at the right of a 2d array
@*
@* @par description:
@* the right column of a 2d array is replicated for pad_size times at the right
@*
@*
@* @param[in] pu1_src
@* uword8 pointer to the source
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] ht
@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array
@*
@* @param[in] pad_size
@* integer -padding size of the array
@*
@* @param[in] ht
@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array
@*
@* @returns
@*
@* @remarks
@* none
@*
@*******************************************************************************
@*/
@.if pad_right_luma == c
@void ihevc_pad_right_luma(uword8 *pu1_src,
@ word32 src_strd,
@ word32 ht,
@ word32 pad_size)
@{
@ word32 row@
@
@ for(row = 0@ row < ht@ row++)
@ {
@ memset(pu1_src, *(pu1_src -1), pad_size)@
@
@ pu1_src += src_strd@
@ }
@}
@
@ r0 => *pu1_src
@ r1 => src_strd
@ r2 => ht
@ r3 => pad_size
.globl ihevc_pad_right_luma_a9q
.type ihevc_pad_right_luma_a9q, %function
ihevc_pad_right_luma_a9q:
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
loop_start_luma_right:
@ pad size is assumed to be pad_left = 80
mov r4,r0
ldrb r8,[r0, #-1]
add r0,r1
ldrb r9,[r0, #-1]
add r0,r1
ldrb r10,[r0, #-1]
add r0,r1
ldrb r11,[r0, #-1]
add r0,r1
add r5,r4,r1
add r6,r5,r1
add r7,r6,r1
vdup.u8 q0,r8
vdup.u8 q1,r9
vdup.u8 q2,r10
vdup.u8 q3,r11
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4] @ 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
subs r2,#4
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
bne loop_start_luma_right
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
@/**
@*******************************************************************************
@*
@* @brief
@@* padding (chroma block) at the right of a 2d array
@*
@* @par description:
@* the right column of a 2d array is replicated for pad_size times at the right
@*
@*
@* @param[in] pu1_src
@@* uword8 pointer to the source
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] ht
@@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array (each colour component)
@*
@* @param[in] pad_size
@* integer -padding size of the array
@*
@* @param[in] ht
@@* integer height of the array
@*
@* @param[in] wd
@* integer width of the array
@*
@* @returns
@*
@* @remarks
@* none
@*
@*******************************************************************************
@*/
@.if pad_right_chroma == c
@void ihevc_pad_right_chroma(uword8 *pu1_src,
@ word32 src_strd,
@ word32 ht,
@ word32 pad_size)
@ r0 => *pu1_src
@ r1 => src_strd
@ r2 => ht
@ r3 => pad_size
.globl ihevc_pad_right_chroma_a9q
.type ihevc_pad_right_chroma_a9q, %function
ihevc_pad_right_chroma_a9q:
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
loop_start_chroma_right:
@ pad size is assumed to be pad_left = 80
mov r4,r0
ldrh r8,[r0, #-2]
add r0,r1
ldrh r9,[r0, #-2]
add r0,r1
ldrh r10,[r0, #-2]
add r0,r1
ldrh r11,[r0, #-2]
add r0,r1
vdup.u16 q0,r8
vdup.u16 q1,r9
vdup.u16 q2,r10
vdup.u16 q3,r11
add r5,r4,r1
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4]! @ 16 bytes store
vst1.8 {d0,d1},[r4] @ 16 bytes store
add r6,r5,r1
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
add r7,r6,r1
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
subs r2,#4
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
bne loop_start_chroma_right
ldmfd sp!,{r4-r11,pc} @reload the registers from sp