blob: b0a79eb20452767d166cd2c88496f9a037a192f5 [file] [log] [blame]
@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/*******************************************************************************
@* @file
@* ihevc_deblk_luma_horz.s
@*
@* @brief
@* contains function definitions for inter prediction interpolation.
@* functions are coded using neon intrinsics and can be compiled using
@* rvct
@*
@* @author
@* anand s
@*
@* @par list of functions:
@*
@*
@* @remarks
@* none
@*
@*******************************************************************************/
.equ qp_offset_u_offset, 40
.equ qp_offset_v_offset, 44
.equ tc_offset_div2_offset, 48
.equ filter_p_offset, 52
.equ filter_q_offset, 56
.text
.align 4
.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_horz_a9q
gai4_ihevc_qp_table_addr:
.long gai4_ihevc_qp_table - ulbl1 - 8
gai4_ihevc_tc_table_addr:
.long gai4_ihevc_tc_table - ulbl2 - 8
.type ihevc_deblk_chroma_horz_a9q, %function
ihevc_deblk_chroma_horz_a9q:
push {r4-r12,lr}
sub r12,r0,r1
vld1.8 {d0},[r0]
sub r5,r12,r1
add r6,r0,r1
add r1,r2,r3
vmovl.u8 q0,d0
ldr r10,[sp,#qp_offset_u_offset]
vld1.8 {d2},[r12]
add r2,r1,#1
ldr r4,[sp,#tc_offset_div2_offset]
vld1.8 {d4},[r5]
ldr r8,[sp,#filter_p_offset]
vld1.8 {d16},[r6]
ldr r9,[sp,#filter_q_offset]
adds r1,r10,r2,asr #1
vmovl.u8 q1,d2
ldr r7,[sp,#qp_offset_v_offset]
ldr r3,gai4_ihevc_qp_table_addr
ulbl1:
add r3, r3, pc
bmi l1.3312
cmp r1,#0x39
ldrle r1,[r3,r1,lsl #2]
subgt r1,r1,#6
l1.3312:
adds r2,r7,r2,asr #1
vmovl.u8 q2,d4
bmi l1.3332
cmp r2,#0x39
ldrle r2,[r3,r2,lsl #2]
subgt r2,r2,#6
l1.3332:
add r1,r1,r4,lsl #1
vsub.i16 q3,q0,q1
add r3,r1,#2
cmp r3,#0x35
movgt r1,#0x35
vshl.i16 q3,q3,#2
vmovl.u8 q8,d16
bgt l1.3368
adds r3,r1,#2
addpl r1,r1,#2
movmi r1,#0
l1.3368:
ldr r3,gai4_ihevc_tc_table_addr
ulbl2:
add r3, r3, pc
vadd.i16 q2,q3,q2
add r2,r2,r4,lsl #1
vsub.i16 q3,q2,q8
add r4,r2,#2
ldr r1,[r3,r1,lsl #2]
cmp r4,#0x35
movgt r2,#0x35
bgt l1.3412
adds r4,r2,#2
addpl r2,r2,#2
movmi r2,#0
l1.3412:
ldr r2,[r3,r2,lsl #2]
cmp r8,#0
vdup.16 q8,r2
vdup.16 q2,r1
rsb r1,r1,#0
vrshr.s16 q3,q3,#3
vdup.16 q9,r1
rsb r1,r2,#0
vzip.16 q2,q8
vdup.16 q10,r1
vzip.16 q9,q10
vmin.s16 q8,q3,q2
vmax.s16 q2,q9,q8
vadd.i16 q1,q1,q2
vsub.i16 q0,q0,q2
vqmovun.s16 d2,q1
vqmovun.s16 d0,q0
beq l1.3528
vst1.8 {d2},[r12]
l1.3528:
cmp r9,#0
beq l1.3540
vst1.8 {d0},[r0]
l1.3540:
pop {r4-r12,pc}