blob: 352d29d8f7863e0bd90ffeca4c9c537bb0e76ffe [file] [log] [blame]
@/******************************************************************************
@ *
@ * Copyright (C) 2015 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_8x8_a9q.s
@*
@* @brief
@* Contains function definitions for intra 8x8 Luma prediction .
@*
@* @author
@* Ittiam
@*
@* @par List of Functions:
@*
@* -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
@* -ih264_intra_pred_luma_8x8_mode_vert_a9q
@* -ih264_intra_pred_luma_8x8_mode_horz_a9q
@* -ih264_intra_pred_luma_8x8_mode_dc_a9q
@* -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
@* -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
@* -ih264_intra_pred_luma_8x8_mode_vert_r_a9q
@* -ih264_intra_pred_luma_8x8_mode_horz_d_a9q
@* -ih264_intra_pred_luma_8x8_mode_vert_l_a9q
@* -ih264_intra_pred_luma_8x8_mode_horz_u_a9q
@*
@* @remarks
@* None
@*
@*******************************************************************************
@*
@* All the functions here are replicated from ih264_intra_pred_filters.c
@
.text
.p2align 2
.extern ih264_gai1_intrapred_luma_8x8_horz_u
.hidden ih264_gai1_intrapred_luma_8x8_horz_u
scratch_intrapred_addr_8x8:
.long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_ref_filtering
@*
@* @brief
@* Reference sample filtering process for Intra_8x8 sample prediction
@*
@* @par Description:
@* Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride [Not used]
@*
@* @param[in] dst_strd
@* integer destination stride[Not used]
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels[Not used]
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
.global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vpush {d8-d15}
vld1.u8 {q0}, [r0]! @
vld1.u8 {q1}, [r0]
add r0, r0, #8 @
vext.8 q2, q0, q1, #1
vext.8 q3, q1, q1, #1
vext.8 q4, q2, q3, #1
vext.8 q5, q3, q3, #1
vld1.8 {d10[7]}, [r0] @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2
vaddl.u8 q10, d0, d4
vaddl.u8 q7, d0, d0 @ SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2
vadd.u16 q7, q10, q7
vaddl.u8 q11, d1, d5
vqrshrun.s16 d14, q7, #2
vaddl.u8 q12, d4, d8
vaddl.u8 q13, d5, d9
vst1.8 {d14[0]}, [r1]!
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vaddl.u8 q9, d2, d6
vaddl.u8 q8, d6, d10
vqrshrun.s16 d4, q12, #2
vqrshrun.s16 d5, q13, #2
vadd.u16 q6, q8, q9
vst1.8 {q2}, [r1]!
vqrshrun.s16 d6, q6, #2
vst1.8 {d6}, [r1]
end_func_ref_filt:
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:vertical
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels(Not used in this function)
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_vert_a9q
ih264_intra_pred_luma_8x8_mode_vert_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
add r0, r0, #9
vld1.8 d0, [r0]
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
vst1.8 d0, [r1], r3
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@******************************************************************************
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:horizontal
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels(Not used in this function)
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@*
@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_horz_a9q
ih264_intra_pred_luma_8x8_mode_horz_a9q:
stmfd sp!, {r14} @store register values to stack
vld1.u8 {d0}, [r0]
mov r2, #6
vdup.u8 d1, d0[7]
vdup.u8 d2, d0[6]
vst1.8 {d1}, [r1], r3
loop_8x8_horz:
vext.8 d0, d0, d0, #6
vst1.8 {d2}, [r1], r3
vdup.u8 d1, d0[7]
subs r2, #2
vdup.u8 d2, d0[6]
vst1.8 {d1}, [r1], r3
bne loop_8x8_horz
vext.8 d0, d0, d0, #6
vst1.8 {d2}, [r1], r3
ldmfd sp!, {pc} @restoring registers from stack
@******************************************************************************
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_dc
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:DC
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_dc_a9q
ih264_intra_pred_luma_8x8_mode_dc_a9q:
stmfd sp!, {r4, r14} @store register values to stack
ldr r4, [sp, #8] @r4 => ui_neighboravailability
ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
beq top_available
ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
beq left_available
vld1.u8 {d0}, [r0] @BOTH LEFT AND TOP AVAILABLE
add r0, r0, #9
vld1.u8 {d1}, [r0]
vpaddl.u8 q0, q0
vadd.u16 d0, d0, d1
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
vqrshrun.s16 d0, q0, #4
vdup.u8 d0, d0[0]
b str_pred
top_available: @ONLY TOP AVAILABLE
ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
beq none_available
add r0, r0, #9
vld1.u8 {d0}, [r0]
vpaddl.u8 d0, d0
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
vqrshrun.s16 d0, q0, #3
vdup.u8 d0, d0[0]
b str_pred
left_available: @ONLY LEFT AVAILABLE
vld1.u8 {d0}, [r0]
vpaddl.u8 d0, d0
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
vqrshrun.s16 d0, q0, #3
vdup.u8 d0, d0[0]
b str_pred
none_available: @NONE AVAILABLE
vmov.u8 q0, #128
str_pred:
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
vst1.8 {d0}, [r1], r3
ldmfd sp!, {r4, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_diag_dl
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
ih264_intra_pred_luma_8x8_mode_diag_dl_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
add r0, r0, #9
sub r5, r3, #4
add r6, r0, #15
vld1.8 {q0}, [r0]
vext.8 q2, q0, q0, #2
vext.8 q1, q0, q0, #1
vld1.8 {d5[6]}, [r6]
@ q1 = q0 shifted to left once
@ q2 = q1 shifted to left once
vaddl.u8 q10, d0, d2 @Adding for FILT121
vaddl.u8 q11, d1, d3
vaddl.u8 q12, d2, d4
vaddl.u8 q13, d3, d5
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vqrshrun.s16 d4, q12, #2
vqrshrun.s16 d5, q13, #2
@Q2 has all FILT121 values
vst1.8 {d4}, [r1], r3
vext.8 q9, q2, q2, #1
vext.8 q8, q9, q9, #1
vst1.8 {d18}, [r1], r3
vext.8 q15, q8, q8, #1
vst1.8 {d16}, [r1], r3
vst1.8 {d30}, [r1], r3
vst1.32 {d4[1]}, [r1]!
vst1.32 {d5[0]}, [r1], r5
vst1.32 {d18[1]}, [r1]!
vst1.32 {d19[0]}, [r1], r5
vst1.32 {d16[1]}, [r1]!
vst1.32 {d17[0]}, [r1], r5
vst1.32 {d30[1]}, [r1]!
vst1.32 {d31[0]}, [r1], r5
end_func_diag_dl:
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_diag_dr
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
ih264_intra_pred_luma_8x8_mode_diag_dr_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vld1.u8 {q0}, [r0]
add r0, r0, #1
vld1.u8 {q1}, [r0]
vext.8 q2, q1, q1, #1
@ q1 = q0 shifted to left once
@ q2 = q1 shifted to left once
vaddl.u8 q10, d0, d2 @Adding for FILT121
vaddl.u8 q11, d1, d3
vaddl.u8 q12, d2, d4
vaddl.u8 q13, d3, d5
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vqrshrun.s16 d4, q12, #2
vqrshrun.s16 d5, q13, #2
@Q2 has all FILT121 values
sub r5, r3, #4
vext.8 q9, q2, q2, #15
vst1.8 {d19}, [r1], r3
vext.8 q8, q9, q9, #15
vst1.8 {d17}, [r1], r3
vext.8 q15, q8, q8, #15
vst1.8 {d31}, [r1], r3
vst1.32 {d4[1]}, [r1]!
vst1.32 {d5[0]}, [r1], r5
vst1.32 {d18[1]}, [r1]!
vst1.32 {d19[0]}, [r1], r5
vst1.32 {d16[1]}, [r1]!
vst1.32 {d17[0]}, [r1], r5
vst1.32 {d30[1]}, [r1]!
vst1.32 {d31[0]}, [r1], r5
vst1.8 {d4}, [r1], r3
end_func_diag_dr:
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert_r
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:Vertical_Right
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_vert_r_a9q
ih264_intra_pred_luma_8x8_mode_vert_r_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vld1.u8 {q0}, [r0]
add r0, r0, #1
vld1.u8 {q1}, [r0]
vext.8 q2, q1, q1, #1
@ q1 = q0 shifted to left once
@ q2 = q1 shifted to left once
vaddl.u8 q10, d0, d2
vaddl.u8 q11, d1, d3
vaddl.u8 q12, d2, d4
vaddl.u8 q13, d3, d5
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vqrshrun.s16 d4, q10, #1
vqrshrun.s16 d5, q11, #1
vqrshrun.s16 d6, q12, #2
vqrshrun.s16 d7, q13, #2
@Q2 has all FILT11 values
@Q3 has all FILT121 values
sub r5, r3, #6
sub r6, r3, #4
vst1.8 {d5}, [r1], r3 @ row 0
vext.8 q9, q3, q3, #15
vmov.8 q11, q9
vext.8 q8, q2, q2, #1
vst1.8 {d19}, [r1], r3 @row 1
vmov.8 q15, q8
vext.8 q10, q2, q2, #15
vuzp.8 q8, q9
@row 2
vext.8 q14, q8, q8, #1
vst1.8 {d21}, [r1]
vst1.8 {d6[6]}, [r1], r3
@row 3
vst1.16 {d29[1]}, [r1]!
vst1.32 {d7[0]}, [r1]!
vst1.16 {d7[2]}, [r1], r5
@row 4
vst1.16 {d19[1]}, [r1]!
vst1.32 {d5[0]}, [r1]!
vst1.16 {d5[2]}, [r1], r5
@row 5
vext.8 q13, q9, q9, #1
vst1.16 {d17[1]}, [r1]!
vst1.32 {d23[0]}, [r1]!
vst1.16 {d23[2]}, [r1], r5
@row 6
vst1.16 {d27[0]}, [r1]!
vst1.8 {d27[2]}, [r1]!
vst1.8 {d5[0]}, [r1]!
vst1.32 {d31[0]}, [r1], r6
@row 7
vst1.32 {d29[0]}, [r1]!
vst1.32 {d7[0]}, [r1]!
end_func_vert_r:
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz_d
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_horz_d_a9q
ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vpush {d8-d15}
vld1.u8 {q0}, [r0]
add r0, r0, #1
vld1.u8 {q1}, [r0]
vext.8 q2, q1, q1, #1
@ q1 = q0 shifted to left once
@ q2 = q1 shifted to left once
vaddl.u8 q10, d0, d2
vaddl.u8 q11, d1, d3
vaddl.u8 q12, d2, d4
vaddl.u8 q13, d3, d5
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vqrshrun.s16 d4, q10, #1
vqrshrun.s16 d5, q11, #1
vqrshrun.s16 d6, q12, #2
vqrshrun.s16 d7, q13, #2
@Q2 has all FILT11 values
@Q3 has all FILT121 values
vmov.8 q4, q2
vmov.8 q5, q3
sub r6, r3, #6
vtrn.8 q4, q5 @
vmov.8 q6, q4
vmov.8 q7, q5
sub r5, r3, #4
vtrn.16 q6, q7
vext.8 q8, q3, q3, #14
@ROW 0
vst1.8 {d17}, [r1]
vst1.16 {d10[3]}, [r1], r3
@ROW 1
vst1.32 {d14[1]}, [r1]!
vst1.32 {d7[0]}, [r1], r5
@ROW 2
vst1.16 {d10[2]}, [r1]!
vst1.32 {d14[1]}, [r1]!
vst1.16 {d7[0]}, [r1], r6
@ROW 3
vst1.32 {d12[1]}, [r1]!
vst1.32 {d14[1]}, [r1], r5
@ROW 4
vst1.16 {d14[1]}, [r1]!
vst1.32 {d12[1]}, [r1]!
vst1.16 {d14[2]}, [r1], r6
@ROW 5
vst1.32 {d14[0]}, [r1]!
vst1.32 {d12[1]}, [r1], r5
@ROW 6
vst1.16 {d10[0]}, [r1]!
vst1.16 {d8[1]}, [r1]!
vst1.16 {d14[1]}, [r1]!
vst1.16 {d12[2]}, [r1], r6
@ROW 7
vst1.32 {d12[0]}, [r1]!
vst1.32 {d14[0]}, [r1], r5
end_func_horz_d:
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert_l
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:Vertical_Left
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_vert_l_a9q
ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
stmfd sp!, {r4-r12, r14} @Restoring registers from stack
vpush {d8-d15}
add r0, r0, #9
vld1.u8 {q0}, [r0]
add r0, r0, #1
vld1.u8 {q1}, [r0]
vext.8 q2, q1, q1, #1
vaddl.u8 q10, d0, d2
vaddl.u8 q11, d1, d3
vaddl.u8 q12, d2, d4
vaddl.u8 q13, d3, d5
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vqrshrun.s16 d4, q10, #1
vqrshrun.s16 d5, q11, #1
vqrshrun.s16 d6, q12, #2
vext.8 q4, q2, q2, #1
vqrshrun.s16 d7, q13, #2
@Q2 has all FILT11 values
@Q3 has all FILT121 values
vext.8 q5, q3, q3, #1
@ROW 0,1
vst1.8 {d4}, [r1], r3
vst1.8 {d6}, [r1], r3
vext.8 q6, q4, q4, #1
vext.8 q7, q5, q5, #1
@ROW 2,3
vst1.8 {d8}, [r1], r3
vst1.8 {d10}, [r1], r3
vext.8 q8, q6, q6, #1
vext.8 q9, q7, q7, #1
@ROW 4,5
vst1.8 {d12}, [r1], r3
vst1.8 {d14}, [r1], r3
@ROW 6,7
vst1.8 {d16}, [r1], r3
vst1.8 {d18}, [r1], r3
end_func_vert_l:
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz_u
@*
@* @brief
@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up
@*
@* @par Description:
@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
@*
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@*
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@*
@* @param[in] src_strd
@* integer source stride
@*
@* @param[in] dst_strd
@* integer destination stride
@*
@* @param[in] ui_neighboravailability
@* availability of neighbouring pixels
@*
@* @returns
@*
@* @remarks
@* None
@*
@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
@ WORD32 ui_neighboravailability)
@**************Variables Vs Registers*****************************************
@ r0 => *pu1_src
@ r1 => *pu1_dst
@ r2 => src_strd
@ r3 => dst_strd
@ r4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_horz_u_a9q
ih264_intra_pred_luma_8x8_mode_horz_u_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vpush {d8-d15}
vld1.u8 {q0}, [r0]
vld1.u8 {d1[7]}, [r0]
vext.8 q1, q0, q0, #1
vext.8 q2, q1, q1, #1
@ LOADING V TABLE
ldr r12, scratch_intrapred_addr_8x8
scrlb8x8l2:
add r12, r12, pc
vaddl.u8 q10, d0, d2
vaddl.u8 q11, d1, d3
vaddl.u8 q12, d2, d4
vaddl.u8 q13, d3, d5
vadd.u16 q12, q10, q12
vadd.u16 q13, q11, q13
vld1.u8 {q5}, [r12]
vqrshrun.s16 d4, q10, #1
vqrshrun.s16 d5, q11, #1
vqrshrun.s16 d6, q12, #2
vqrshrun.s16 d7, q13, #2
@Q2 has all FILT11 values
@Q3 has all FILT121 values
vtbl.u8 d12, {q2, q3}, d10
vdup.u8 q7, d5[7] @
vtbl.u8 d13, {q2, q3}, d11
vext.8 q8, q6, q7, #2
vext.8 q9, q8, q7, #2
vst1.8 {d12}, [r1], r3
vext.8 q10, q9, q7, #2
vst1.8 {d16}, [r1], r3
vst1.8 {d18}, [r1], r3
vst1.8 {d20}, [r1], r3
vst1.8 {d13}, [r1], r3
vst1.8 {d17}, [r1], r3
vst1.8 {d19}, [r1], r3
vst1.8 {d21}, [r1], r3
end_func_horz_u:
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack