blob: dcb1f251d194cfc286bf50d3b31a9b460bb41cfa [file] [log] [blame]
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//* @file
//* ihevc_deblk_luma_vert.s
//* @brief
//* contains function definitions for inter prediction interpolation.
//* functions are coded using neon intrinsics and can be compiled using
//* rvct
//* @author
//* anand s
//* @par list of functions:
//* @remarks
//* none
//void ihevc_deblk_chroma_vert(UWORD8 *pu1_src,
// WORD32 src_strd,
// WORD32 quant_param_p,
// WORD32 quant_param_q,
// WORD32 qp_offset_u,
// WORD32 qp_offset_v,
// WORD32 tc_offset_div2,
// WORD32 filter_flag_p,
// WORD32 filter_flag_q)
.align 4
.include "ihevc_neon_macros.s"
.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_vert_av8
.type ihevc_deblk_chroma_vert_av8, %function
sxtw x4,w4
sxtw x5,w5
sxtw x6,w6
mov x15,x5
mov x5,x6
mov x6,x15
mov x12, x7
mov x7, x4
ldr w4, [sp]
stp x19, x20,[sp,#-16]!
sub x8,x0,#4
add x2,x2,x3
ld1 {v5.8b},[x8],x1
add x2,x2,#1
ld1 {v17.8b},[x8],x1
ld1 {v16.8b},[x8],x1
ld1 {v4.8b},[x8]
trn1 v29.8b, v5.8b, v17.8b
trn2 v17.8b, v5.8b, v17.8b
mov v5.d[0], v29.d[0]
adds x3,x7,x2,asr #1
trn1 v29.8b, v16.8b, v4.8b
trn2 v4.8b, v16.8b, v4.8b
mov v16.d[0], v29.d[0]
adrp x7, :got:gai4_ihevc_qp_table
ldr x7, [x7, #:got_lo12:gai4_ihevc_qp_table]
bmi l1.2944
cmp x3,#0x39
bgt lbl78
ldr w3, [x7,x3,lsl #2]
sxtw x3,w3
sub x20,x3,#6
csel x3, x20, x3,gt
trn1 v29.4h, v5.4h, v16.4h
trn2 v16.4h, v5.4h, v16.4h
mov v5.d[0], v29.d[0]
adds x2,x6,x2,asr #1
trn1 v29.4h, v17.4h, v4.4h
trn2 v4.4h, v17.4h, v4.4h
mov v17.d[0], v29.d[0]
bmi l1.2964
cmp x2,#0x39
bgt lbl86
ldr w2, [x7,x2,lsl #2]
sxtw x2,w2
sub x20,x2,#6
csel x2, x20, x2,gt
trn1 v29.2s, v5.2s, v17.2s
trn2 v17.2s, v5.2s, v17.2s
mov v5.d[0], v29.d[0]
add x3,x3,x5,lsl #1
trn1 v29.2s, v16.2s, v4.2s
trn2 v4.2s, v16.2s, v4.2s
mov v16.d[0], v29.d[0]
add x6,x3,#2
uxtl v18.8h, v17.8b
cmp x6,#0x35
mov x20,#0x35
csel x3, x20, x3,gt
bgt l1.2996
adds x6,x3,#2
add x20,x3,#2
csel x3, x20, x3,pl
mov x20,#0
csel x3, x20, x3,mi
usubl v0.8h, v17.8b, v16.8b
adrp x6, :got:gai4_ihevc_tc_table
ldr x6, [x6, #:got_lo12:gai4_ihevc_tc_table]
shl v0.8h, v0.8h,#2
add x2,x2,x5,lsl #1
add x5,x2,#2
uaddw v0.8h, v0.8h , v5.8b
cmp x5,#0x35
ldr w3, [x6,x3,lsl #2]
sxtw x3,w3
usubw v4.8h, v0.8h , v4.8b
mov x20,#0x35
csel x2, x20, x2,gt
bgt l1.3036
adds x5,x2,#2
add x20,x2,#2
csel x2, x20, x2,pl
mov x20,#0
csel x2, x20, x2,mi
srshr v6.8h, v4.8h,#3
dup v2.4h,w3
ldr w2, [x6,x2,lsl #2]
sxtw x2,w2
sub x20,x3,#0
neg x3, x20
cmp x12,#0
dup v3.4h,w2
sub x20,x2,#0
neg x2, x20
dup v30.4h,w3
dup v31.4h,w2
mov v30.d[1],v31.d[0]
mov v2.d[1],v3.d[0]
smin v4.8h, v6.8h , v2.8h
smax v2.8h, v30.8h , v4.8h
uxtl v6.8h, v16.8b
add v0.8h, v6.8h , v2.8h
sub v2.8h, v18.8h , v2.8h
sqxtun v0.8b, v0.8h
sub x2,x0,#2
sqxtun v1.8b, v2.8h
trn1 v29.2s, v0.2s, v1.2s
trn2 v1.2s, v0.2s, v1.2s
mov v0.d[0], v29.d[0]
trn1 v29.8b, v0.8b, v1.8b
trn2 v1.8b, v0.8b, v1.8b
mov v0.d[0], v29.d[0]
beq l1.3204
st1 {v0.h}[0],[x2],x1
st1 {v1.h}[0],[x2],x1
st1 {v0.h}[1],[x2],x1
st1 {v1.h}[1],[x2]
cmp x4,#0
beq l1.3228
st1 {v0.h}[2],[x0],x1
st1 {v1.h}[2],[x0],x1
st1 {v0.h}[3],[x0],x1
st1 {v1.h}[3],[x0]
ldp x19, x20,[sp],#16