blob: 0dd82f34e9c3bd6a81f5a244a2d670bcf33e332d [file] [log] [blame]
@/******************************************************************************
@ *
@ * Copyright (C) 2015 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_16x16_a9q.s
@*
@* @brief
@* Contains function definitions for intra 16x16 Luma prediction .
@*
@* @author
@* Ittiam
@*
@* @par List of Functions:
@*
@* - ih264_intra_pred_luma_16x16_mode_vert_a9q()
@* - ih264_intra_pred_luma_16x16_mode_horz_a9q()
@* - ih264_intra_pred_luma_16x16_mode_dc_a9q()
@* - ih264_intra_pred_luma_16x16_mode_plane_a9q()
@*
@* @remarks
@* None
@*
@*******************************************************************************
@*
@* All the functions here are replicated from ih264_intra_pred_filters.c
@
@**
@**
@**
@
.text
.p2align 2
.extern ih264_gai1_intrapred_luma_plane_coeffs
.hidden ih264_gai1_intrapred_luma_plane_coeffs
scratch_intrapred_addr1:
.long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_16x16_mode_vert
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:vertical
@*
@* @par Description:
@* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels(Not used in this function)
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_vert_a9q
ih264_intra_pred_luma_16x16_mode_vert_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
add r0, r0, #17
vld1.8 {q0}, [r0]
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@******************************************************************************
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_16x16_mode_horz
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:horizontal
@*
@* @par Description:
@* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels(Not used in this function)
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@*
@void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_horz_a9q
ih264_intra_pred_luma_16x16_mode_horz_a9q:
stmfd sp!, {r14} @store register values to stack
vld1.u8 {q0}, [r0]
mov r2, #14
vdup.u8 q1, d1[7]
vdup.u8 q2, d1[6]
vst1.8 {q1}, [r1], r3
loop_16x16_horz:
vext.8 q0, q0, q0, #14
vst1.8 {q2}, [r1], r3
vdup.u8 q1, d1[7]
subs r2, #2
vdup.u8 q2, d1[6]
vst1.8 {q1}, [r1], r3
bne loop_16x16_horz
vext.8 q0, q0, q0, #14
vst1.8 {q2}, [r1], r3
ldmfd sp!, {pc} @Restoring registers from stack
@******************************************************************************
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_16x16_mode_dc
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:DC
@*
@* @par Description:
@* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_dc_a9q
ih264_intra_pred_luma_16x16_mode_dc_a9q:
stmfd sp!, {r4, r14} @store register values to stack
ldr r4, [sp, #8] @r4 => ui_neighboravailability
ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
beq top_available
ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
beq left_available
vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE
add r0, r0, #17
vpaddl.u8 q0, q0
vld1.u8 {q1}, [r0]
vpaddl.u8 q1, q1
vadd.u16 q0, q0, q1
vadd.u16 d0, d0, d1
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
vqrshrun.s16 d0, q0, #5
vdup.u8 q0, d0[0]
b str_pred
top_available: @ONLY TOP AVAILABLE
ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
beq none_available
add r0, r0, #17
vld1.u8 {q0}, [r0]
vpaddl.u8 q0, q0
vadd.u16 d0, d0, d1
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
vqrshrun.s16 d0, q0, #4
vdup.u8 q0, d0[0]
b str_pred
left_available: @ONLY LEFT AVAILABLE
vld1.u8 {q0}, [r0]
vpaddl.u8 q0, q0
vadd.u16 d0, d0, d1
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
vqrshrun.s16 d0, q0, #4
vdup.u8 q0, d0[0]
b str_pred
none_available: @NONE AVAILABLE
vmov.u8 q0, #128
str_pred:
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
vst1.8 {q0}, [r1], r3
ldmfd sp!, {r4, pc} @Restoring registers from stack
@******************************************************************************
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_16x16_mode_plane
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:PLANE
@*
@* @par Description:
@* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_plane_a9q
ih264_intra_pred_luma_16x16_mode_plane_a9q:
stmfd sp!, {r4-r10, r12, lr}
mov r2, r1
add r1, r0, #17
add r0, r0, #15
mov r8, #9
sub r1, r1, #1
mov r10, r1 @top_left
mov r4, #-1
vld1.32 d2, [r1], r8
ldr r7, scratch_intrapred_addr1
scrlbl1:
add r7, r7, pc
vld1.32 d0, [r1]
vrev64.8 d2, d2
vld1.32 {q3}, [r7]
vsubl.u8 q0, d0, d2
vmovl.u8 q8, d6
vmul.s16 q0, q0, q8
vmovl.u8 q9, d7
add r7, r0, r4, lsl #3
sub r0, r7, r4, lsl #1
rsb lr, r4, #0x0
vpadd.s16 d0, d0, d1
ldrb r8, [r7], r4
ldrb r9, [r0], lr
vpaddl.s16 d0, d0
sub r12, r8, r9
ldrb r8, [r7], r4
vpaddl.s32 d0, d0
ldrb r9, [r0], lr
sub r8, r8, r9
vshl.s32 d2, d0, #2
add r12, r12, r8, lsl #1
vadd.s32 d0, d0, d2
ldrb r8, [r7], r4
ldrb r9, [r0], lr
vrshr.s32 d0, d0, #6 @ i_b = D0[0]
sub r8, r8, r9
ldrb r5, [r7], r4
add r8, r8, r8, lsl #1
vdup.16 q2, d0[0]
add r12, r12, r8
ldrb r9, [r0], lr
vmul.s16 q0, q2, q8
sub r5, r5, r9
vmul.s16 q1, q2, q9
add r12, r12, r5, lsl #2
ldrb r8, [r7], r4
ldrb r9, [r0], lr
sub r8, r8, r9
ldrb r5, [r7], r4
add r8, r8, r8, lsl #2
ldrb r6, [r0], lr
add r12, r12, r8
ldrb r8, [r7], r4
ldrb r9, [r0], lr
sub r5, r5, r6
sub r8, r8, r9
add r5, r5, r5, lsl #1
rsb r8, r8, r8, lsl #3
add r12, r12, r5, lsl #1
ldrb r5, [r7], r4
ldrb r6, [r10] @top_left
add r12, r12, r8
sub r9, r5, r6
ldrb r6, [r1, #7]
add r12, r12, r9, lsl #3 @ i_c = r12
add r8, r5, r6
add r12, r12, r12, lsl #2
lsl r8, r8, #4 @ i_a = r8
add r12, r12, #0x20
lsr r12, r12, #6
vshl.s16 q14, q2, #3
vdup.16 q3, r12
vdup.16 q15, r8
vshl.s16 q13, q3, #3
vsub.s16 q15, q15, q14
vsub.s16 q15, q15, q13
vadd.s16 q14, q15, q3
mov r0, #14
vadd.s16 q13, q14, q0
vadd.s16 q14, q14, q1
vqrshrun.s16 d20, q13, #5
vqrshrun.s16 d21, q14, #5
loop_16x16_plane:
vadd.s16 q13, q13, q3
vadd.s16 q14, q14, q3
vqrshrun.s16 d22, q13, #5
vst1.32 {q10}, [r2], r3
vqrshrun.s16 d23, q14, #5
vadd.s16 q13, q13, q3
subs r0, #2
vadd.s16 q14, q14, q3
vqrshrun.s16 d20, q13, #5
vst1.32 {q11}, [r2], r3
vqrshrun.s16 d21, q14, #5
bne loop_16x16_plane
vadd.s16 q13, q13, q3
vadd.s16 q14, q14, q3
vqrshrun.s16 d22, q13, #5
vst1.32 {q10}, [r2], r3
vqrshrun.s16 d23, q14, #5
vst1.32 {q11}, [r2], r3
ldmfd sp!, {r4-r10, r12, pc}