blob: bdbaa02ebcbab85d3f7b635322707c75b9f56030 [file] [log] [blame]
@ *
@ * Copyright (C) 2015 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ *
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
@* and do the prediction.
@* @par Description
@* This function evaluates first three intra chroma modes and compute corresponding sad
@* and return the buffer predicted with best mode.
@* @param[in] pu1_src
@* UWORD8 pointer to the source
@** @param[in] pu1_ngbr_pels
@* UWORD8 pointer to neighbouring pels
@* @param[out] pu1_dst
@* UWORD8 pointer to the destination
@* @param[in] src_strd
@* integer source stride
@* @param[in] dst_strd
@* integer destination stride
@* @param[in] u4_n_avblty
@* availability of neighbouring pixels
@* @param[in] u4_intra_mode
@* Pointer to the variable in which best mode is returned
@* @param[in] pu4_sadmin
@* Pointer to the variable in which minimum sad is returned
@* @param[in] u4_valid_intra_modes
@* Says what all modes are valid
@* @return none
@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
@ UWORD8 *pu1_ngbr_pels_i16,
@ UWORD8 *pu1_dst,
@ UWORD32 src_strd,
@ UWORD32 dst_strd,
@ WORD32 u4_n_avblty,
@ UWORD32 *u4_intra_mode,
@ WORD32 *pu4_sadmin,
@ UWORD32 u4_valid_intra_modes)
.p2align 2
.global ih264e_evaluate_intra_chroma_modes_a9q
@r0 = pu1_src,
@r1 = pu1_ngbr_pels_i16,
@r2 = pu1_dst,
@r3 = src_strd,
@r4 = dst_strd,
@r5 = u4_n_avblty,
@r6 = u4_intra_mode,
@r7 = pu4_sadmin
stmfd sp!, {r4-r12, r14} @store register values to stack
ldr r5, [sp, #44] @r5 = u4_n_avblty,
mov r12, r1 @
vpush {d8-d15}
vld1.32 {q4}, [r1]!
add r1, r1, #2
vld1.32 {q5}, [r1]!
vuzp.u8 q4, q5 @
vpaddl.u8 d8, d8
vpadd.u16 d8, d8
vpaddl.u8 d9, d9
vpadd.u16 d9, d9
vpaddl.u8 d10, d10
vpadd.u16 d10, d10
vpaddl.u8 d11, d11
and r7, r5, #5
vpadd.u16 d11, d11
subs r8, r7, #5
beq all_available
subs r8, r7, #4
beq top_available
subs r8, r7, #1
beq left_available
mov r10, #128
vdup.8 q14, r10
vdup.8 q15, r10
b sad
vzip.u16 q4, q5
vext.16 q6, q4, q4, #2
vadd.u16 q7, q5, q6
vqrshrn.u16 d14, q7, #3
vqrshrn.u16 d15, q4, #2
vqrshrn.u16 d16, q5, #2
vdup.16 d28, d14[0]
vdup.16 d29, d16[1]
vdup.16 d30, d15[0]
vdup.16 d31, d14[1]
b sad
vzip.u16 q4, q5
vqrshrn.u16 d16, q5, #2
vdup.16 d28, d16[0]
vdup.16 d29, d16[1]
vdup.16 d30, d16[0]
vdup.16 d31, d16[1]
b sad
vzip.u16 q4, q5
vqrshrn.u16 d16, q4, #2
vdup.16 d28, d16[3]
vdup.16 d29, d16[3]
vdup.16 d30, d16[2]
vdup.16 d31, d16[2]
vld1.32 {q4}, [r12]!
sub r8, r12, #2
add r12, r12, #2
vld1.32 {q5}, [r12]!
add r12, r0, r3, lsl #2
sub r10, r8, #8
vld1.32 {q0}, [r0], r3
ldrh r9, [r8]
vdup.16 q10, r9 @ row 0
@/vertical row 0;
vabdl.u8 q8, d0, d10
vabdl.u8 q9, d1, d11
sub r8, r8, #2
vld1.32 {q1}, [r12], r3
@/HORZ row 0;
vabdl.u8 q13, d0, d20
vabdl.u8 q7, d1, d21
ldrh r9, [r10]
@/dc row 0;
vabdl.u8 q11, d0, d28
vabdl.u8 q12, d1, d29
vdup.16 q10, r9 @ row 4
@/vertical row 4;
vabal.u8 q8, d2, d10
vabal.u8 q9, d3, d11
sub r10, r10, #2
@/HORZ row 4;
vabal.u8 q13, d2, d20
vabal.u8 q7, d3, d21
@/dc row 4;
vabal.u8 q11, d2, d30
vabal.u8 q12, d3, d31
mov r11, #3
vld1.32 {q0}, [r0], r3
ldrh r9, [r8]
@/vertical row i;
vabal.u8 q8, d0, d10
vabal.u8 q9, d1, d11
vdup.16 q10, r9 @ row i
vld1.32 {q1}, [r12], r3
sub r8, r8, #2
@/HORZ row i;
vabal.u8 q13, d0, d20
vabal.u8 q7, d1, d21
ldrh r9, [r10]
@/dc row i;
vabal.u8 q11, d0, d28
vabal.u8 q12, d1, d29
sub r10, r10, #2
vdup.16 q10, r9 @ row i+4
@/vertical row 4;
vabal.u8 q8, d2, d10
vabal.u8 q9, d3, d11
subs r11, r11, #1
@/HORZ row i+4;
vabal.u8 q13, d2, d20
vabal.u8 q7, d3, d21
@/dc row i+4;
vabal.u8 q11, d2, d30
vabal.u8 q12, d3, d31
bne loop
vadd.i16 q9, q9, q8 @/VERT
vadd.i16 q7, q13, q7 @/HORZ
vadd.i16 q12, q11, q12 @/DC
vadd.i16 d18, d19, d18 @/VERT
vadd.i16 d14, d15, d14 @/HORZ
vadd.i16 d24, d24, d25 @/DC
vpaddl.u16 d18, d18 @/VERT
vpaddl.u16 d14, d14 @/HORZ
vpaddl.u16 d24, d24 @/DC
vpaddl.u32 d18, d18 @/VERT
vpaddl.u32 d14, d14 @/HORZ
vpaddl.u32 d24, d24 @/DC
vmov.u32 r8, d18[0] @ vert
vmov.u32 r9, d14[0] @horz
vmov.u32 r10, d24[0] @dc
mov r11, #1
ldr r0, [sp, #120] @ u4_valid_intra_modes
lsl r11 , #30
ands r7, r0, #04 @ vert mode valid????????????
moveq r8, r11
ands r6, r0, #02 @ horz mode valid????????????
moveq r9, r11
ands r6, r0, #01 @ dc mode valid????????????
moveq r10, r11
ldr r4, [sp, #104] @r4 = dst_strd,
ldr r6, [sp, #112] @ R6 =MODE
ldr r7, [sp, #116] @r7 = pu4_sadmin
cmp r10, r9
bgt not_dc
cmp r10, r8
bgt do_vert
str r10 , [r7] @MIN SAD
mov r10, #0
str r10 , [r6] @ MODE
b do_dc_vert
cmp r9, r8
bgt do_vert
vdup.16 q10, d9[3] @/HORIZONTAL VALUE ROW=0;
str r9 , [r7] @MIN SAD
mov r9, #1
vdup.16 q11, d9[2] @/HORIZONTAL VALUE ROW=1;
str r9 , [r6] @ MODE
vdup.16 q12, d9[1] @/HORIZONTAL VALUE ROW=2;
vst1.32 {d20, d21} , [r2], r4 @0
vdup.16 q13, d9[0] @/HORIZONTAL VALUE ROW=3;
vst1.32 {d22, d23} , [r2], r4 @1
vdup.16 q14, d8[3] @/HORIZONTAL VALUE ROW=4;
vst1.32 {d24, d25} , [r2], r4 @2
vdup.16 q15, d8[2] @/HORIZONTAL VALUE ROW=5;
vst1.32 {d26, d27} , [r2], r4 @3
vdup.16 q1, d8[1] @/HORIZONTAL VALUE ROW=6;
vst1.32 {d28, d29} , [r2], r4 @4
vdup.16 q2, d8[0] @/HORIZONTAL VALUE ROW=7;
vst1.32 {d30, d31} , [r2], r4 @5
vst1.32 {d2, d3} , [r2], r4 @6
vst1.32 {d4, d5} , [r2], r4 @7
b end_func
str r8 , [r7] @MIN SAD
mov r8, #2
str r8 , [r6] @ MODE
vmov q15, q5
vmov q14, q5
vst1.32 {d28, d29} , [r2], r4 @0
vst1.32 {d28, d29} , [r2], r4 @1
vst1.32 {d28, d29} , [r2], r4 @2
vst1.32 {d28, d29} , [r2], r4 @3
vst1.32 {d30, d31} , [r2], r4 @4
vst1.32 {d30, d31} , [r2], r4 @5
vst1.32 {d30, d31} , [r2], r4 @6
vst1.32 {d30, d31} , [r2], r4 @7
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack