blob: edc70e76acf7c90afcd82f2b988dffb3343f2ba5 [file] [log] [blame]
///*****************************************************************************
//*
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
//*
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//*
//* http://www.apache.org/licenses/LICENSE-2.0
//*
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//*
//*****************************************************************************/
///**
///*******************************************************************************
//* //file
//* ihevcd_itrans_recon_dc_luma.s
//*
//* //brief
//* contains function definitions itrans and recon for dc only case
//*
//* //author
//* ittiam
//*
//* //par list of functions:
//*
//*
//* //remarks
//* none
//*
//*******************************************************************************/
.text
.include "ihevc_neon_macros.s"
.globl ihevcd_itrans_recon_dc_luma_av8
.type ihevcd_itrans_recon_dc_luma_av8, %function
ihevcd_itrans_recon_dc_luma_av8:
//void ihevcd_itrans_recon_dc_luma(uword8 *pu1_pred,
// uword8 *pu1_dst,
// word32 pred_strd,
// word32 dst_strd,
// word32 log2_trans_size,
// word16 i2_coeff_value)
//x0:pu1_pred
//x1:pu1_dest
//x2:pred_strd
//x3:dst_strd
stp x19, x20,[sp,#-16]!
sxth x5,w5
mov x10,#1
lsl x4,x10,x4 // trans_size = (1 << log2_trans_size)//
mov x6,#64 // 1 << (shift1 - 1)//
mov x7,#2048 // 1<<(shift2-1)
add x8,x6,x5,lsl #6
asr x20, x8, #7
mov x19, #32767
cmp x20,x19
blt lbl37
mov x8,#32767
b lbl37_1
lbl37:
mov x19,#-32768
cmp x20,x19
csel x8, x19, x20, lt
lbl37_1:
add x5,x7,x8,lsl #6
asr x20, x5, #12
mov x19,#32767
cmp x20,x19
blt lbl39
mov x6,#32767
b lbl39_1
lbl39:
mov x19,#-32768
cmp x20,x19
csel x6, x19, x20, lt
lbl39_1:
mov x9,x4
mov x8,x4
// x6 has the dc_value
// x4 has the trans_size value
// x8 has the row value
// x9 has the col value
dup v0.8h,w6
cmp x4,#4
beq row_loop_4
row_loop:
mov x9,x4
col_loop:
mov x7,x0
ld1 {v2.8b},[x7],x2
ld1 {v3.8b},[x7],x2
ld1 {v4.8b},[x7],x2
ld1 {v5.8b},[x7],x2
ld1 {v6.8b},[x7],x2
ld1 {v7.8b},[x7],x2
ld1 {v1.8b},[x7],x2
ld1 {v17.8b},[x7]
add x0,x0,#8
uaddw v30.8h, v0.8h , v2.8b
uaddw v28.8h, v0.8h , v3.8b
uaddw v26.8h, v0.8h , v4.8b
uaddw v24.8h, v0.8h , v5.8b
uaddw v22.8h, v0.8h , v6.8b
uaddw v20.8h, v0.8h , v7.8b
uaddw v18.8h, v0.8h , v1.8b
uaddw v16.8h, v0.8h , v17.8b
mov x11,x1
sqxtun v2.8b, v30.8h
sqxtun v3.8b, v28.8h
sqxtun v4.8b, v26.8h
sqxtun v5.8b, v24.8h
sqxtun v6.8b, v22.8h
sqxtun v7.8b, v20.8h
sqxtun v1.8b, v18.8h
sqxtun v17.8b, v16.8h
st1 {v2.2s},[x11],x3
st1 {v3.2s},[x11],x3
st1 {v4.2s},[x11],x3
st1 {v5.2s},[x11],x3
st1 {v6.2s},[x11],x3
st1 {v7.2s},[x11],x3
st1 {v1.2s},[x11],x3
st1 {v17.2s},[x11]
add x1,x1,#8
subs x9,x9,#8
bgt col_loop
subs x8,x8,#8
add x0,x0,x2,lsl #3
add x1,x1,x3,lsl #3
sub x0,x0,x4
sub x1,x1,x4
bgt row_loop
b end_loops
row_loop_4:
mov x9,x10
col_loop_4:
ld1 {v2.8b},[x0],x2
ld1 {v3.8b},[x0],x2
ld1 {v4.8b},[x0],x2
ld1 {v5.8b},[x0]
uaddw v30.8h, v0.8h , v2.8b
uaddw v28.8h, v0.8h , v3.8b
uaddw v26.8h, v0.8h , v4.8b
uaddw v24.8h, v0.8h , v5.8b
sqxtun v2.8b, v30.8h
sqxtun v3.8b, v28.8h
sqxtun v4.8b, v26.8h
sqxtun v5.8b, v24.8h
st1 {v2.s}[0],[x1],x3
st1 {v3.s}[0],[x1],x3
st1 {v4.s}[0],[x1],x3
st1 {v5.s}[0],[x1]
end_loops:
ldp x19, x20,[sp],#16
ret