| ///***************************************************************************** |
| //* |
| //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| //* |
| //* Licensed under the Apache License, Version 2.0 (the "License"); |
| //* you may not use this file except in compliance with the License. |
| //* You may obtain a copy of the License at: |
| //* |
| //* http://www.apache.org/licenses/LICENSE-2.0 |
| //* |
| //* Unless required by applicable law or agreed to in writing, software |
| //* distributed under the License is distributed on an "AS IS" BASIS, |
| //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| //* See the License for the specific language governing permissions and |
| //* limitations under the License. |
| //* |
| //*****************************************************************************/ |
| ///** |
| ///******************************************************************************* |
| //* //file |
| //* ihevcd_itrans_recon_dc_chroma.s |
| //* |
| //* //brief |
| //* contains function definitions itrans and recon for dc only case |
| //* |
| //* //author |
| //* ittiam |
| //* |
| //* //par list of functions: |
| //* |
| //* |
| //* //remarks |
| //* none |
| //* |
| //*******************************************************************************/ |
| |
| |
| .text |
| .include "ihevc_neon_macros.s" |
| |
| |
| .globl ihevcd_itrans_recon_dc_chroma_av8 |
| |
| .type ihevcd_itrans_recon_dc_chroma_av8, %function |
| |
| ihevcd_itrans_recon_dc_chroma_av8: |
| |
| //void ihevcd_itrans_recon_dc_chroma(uword8 *pu1_pred, |
| // uword8 *pu1_dst, |
| // word32 pred_strd, |
| // word32 dst_strd, |
| // word32 log2_trans_size, |
| // word16 i2_coeff_value) |
| |
| //x0:pu1_pred |
| //x1:pu1_dest |
| //x2:pred_strd |
| //x3:dst_strd |
| |
| |
| |
| push_v_regs |
| stp x19, x20,[sp,#-16]! |
| |
| sxth x5, w5 // since the argument is of word16, sign extend to x register |
| |
| mov x10,#1 |
| lsl x4,x10,x4 // trans_size = (1 << log2_trans_size)// |
| mov x6,#64 // 1 << (shift1 - 1)// |
| mov x7,#2048 // 1<<(shift2-1) |
| |
| add x8,x6,x5,lsl #6 |
| asr x20, x8, #7 |
| mov x19,#32767 |
| cmp x20,x19 |
| blt lbl36 |
| mov x8,#32767 |
| b lbl36_1 |
| lbl36: |
| mov x19,#-32768 |
| cmp x20,x19 |
| csel x8, x19, x20, lt |
| lbl36_1: |
| |
| add x5,x7,x8,lsl #6 |
| asr x20, x5, #12 |
| mov x19,#32767 |
| cmp x20,x19 |
| blt lbl38 |
| mov x6,#32767 |
| b lbl38_1 |
| lbl38: |
| mov x19,#-32768 |
| cmp x20,x19 |
| csel x6, x19, x20, lt |
| lbl38_1: |
| |
| mov x9,x4 |
| mov x8,x4 |
| |
| // x6 has the dc_value |
| // x4 has the trans_size value |
| // x8 has the row value |
| // x9 has the col value |
| dup v0.8h,w6 |
| cmp x4,#4 |
| beq row_loop_4chroma |
| |
| |
| row_loop_chroma: |
| mov x9,x4 |
| |
| |
| col_loop_chroma: |
| |
| mov x7,x0 |
| ld2 {v2.8b, v3.8b},[x7],x2 |
| ld2 {v4.8b, v5.8b},[x7],x2 |
| ld2 {v6.8b, v7.8b},[x7],x2 |
| ld2 {v8.8b, v9.8b},[x7],x2 |
| |
| ld2 {v10.8b, v11.8b},[x7],x2 |
| ld2 {v12.8b, v13.8b},[x7],x2 |
| ld2 {v14.8b, v15.8b},[x7],x2 |
| ld2 {v16.8b, v17.8b},[x7] |
| |
| add x0,x0,#16 |
| |
| |
| uaddw v30.8h, v0.8h , v2.8b |
| uaddw v28.8h, v0.8h , v4.8b |
| uaddw v26.8h, v0.8h , v6.8b |
| uaddw v24.8h, v0.8h , v8.8b |
| uaddw v22.8h, v0.8h , v10.8b |
| uaddw v20.8h, v0.8h , v12.8b |
| uaddw v18.8h, v0.8h , v14.8b |
| |
| |
| mov x11,x1 |
| sqxtun v2.8b, v30.8h |
| sqxtun v4.8b, v28.8h |
| sqxtun v6.8b, v26.8h |
| sqxtun v8.8b, v24.8h |
| |
| uaddw v30.8h, v0.8h , v16.8b |
| |
| sqxtun v10.8b, v22.8h |
| sqxtun v12.8b, v20.8h |
| sqxtun v14.8b, v18.8h |
| sqxtun v16.8b, v30.8h |
| |
| st2 {v2.8b, v3.8b},[x11],x3 |
| st2 {v4.8b, v5.8b},[x11],x3 |
| st2 {v6.8b, v7.8b},[x11],x3 |
| st2 {v8.8b, v9.8b},[x11],x3 |
| |
| st2 {v10.8b, v11.8b},[x11],x3 |
| st2 {v12.8b, v13.8b},[x11],x3 |
| st2 {v14.8b, v15.8b},[x11],x3 |
| st2 {v16.8b, v17.8b},[x11] |
| |
| add x1,x1,#16 |
| |
| subs x9,x9,#8 |
| bgt col_loop_chroma |
| |
| subs x8,x8,#8 |
| |
| add x0,x0,x2,lsl #3 |
| add x1,x1,x3,lsl #3 |
| sub x0,x0,x4,lsl #1 |
| sub x1,x1,x4,lsl #1 |
| bgt row_loop_chroma |
| b end_loops_chroma |
| |
| |
| row_loop_4chroma: |
| mov x9,x10 |
| |
| |
| col_loop_4chroma: |
| |
| |
| ld2 {v2.8b, v3.8b},[x0],x2 |
| ld2 {v4.8b, v5.8b},[x0],x2 |
| ld2 {v6.8b, v7.8b},[x0],x2 |
| ld2 {v8.8b, v9.8b},[x0] |
| |
| |
| |
| |
| uaddw v30.8h, v0.8h , v2.8b |
| uaddw v28.8h, v0.8h , v4.8b |
| uaddw v26.8h, v0.8h , v6.8b |
| uaddw v24.8h, v0.8h , v8.8b |
| |
| |
| |
| sqxtun v31.8b, v30.8h |
| sqxtun v29.8b, v28.8h |
| sqxtun v27.8b, v26.8h |
| sqxtun v25.8b, v24.8h |
| |
| |
| zip1 v2.8b, v31.8b, v3.8b |
| zip1 v4.8b, v29.8b, v5.8b |
| zip1 v6.8b, v27.8b, v7.8b |
| zip1 v8.8b, v25.8b, v9.8b |
| |
| st1 {v2.2s},[x1],x3 |
| st1 {v4.2s},[x1],x3 |
| st1 {v6.2s},[x1],x3 |
| st1 {v8.2s},[x1] |
| |
| end_loops_chroma: |
| ldp x19, x20,[sp],#16 |
| pop_v_regs |
| ret |
| |
| |