| @/***************************************************************************** |
| @* |
| @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| @* |
| @* Licensed under the Apache License, Version 2.0 (the "License"); |
| @* you may not use this file except in compliance with the License. |
| @* You may obtain a copy of the License at: |
| @* |
| @* http://www.apache.org/licenses/LICENSE-2.0 |
| @* |
| @* Unless required by applicable law or agreed to in writing, software |
| @* distributed under the License is distributed on an "AS IS" BASIS, |
| @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @* See the License for the specific language governing permissions and |
| @* limitations under the License. |
| @* |
| @*****************************************************************************/ |
| @/** |
| @/******************************************************************************* |
| @* @file |
| @* ihevcd_itrans_recon_dc_luma.s |
| @* |
| @* @brief |
| @* contains function definitions itrans and recon for dc only case |
| @* |
| @* @author |
| @* ittiam |
| @* |
| @* @par list of functions: |
| @* |
| @* |
| @* @remarks |
| @* none |
| @* |
| @*******************************************************************************/ |
| |
| .text |
| |
| |
| |
| .globl ihevcd_itrans_recon_dc_luma_a9q |
| |
| .type ihevcd_itrans_recon_dc_luma_a9q, %function |
| |
| ihevcd_itrans_recon_dc_luma_a9q: |
| |
| @void ihevcd_itrans_recon_dc_luma(uword8 *pu1_pred, |
| @ uword8 *pu1_dst, |
| @ word32 pred_strd, |
| @ word32 dst_strd, |
| @ word32 log2_trans_size, |
| @ word16 i2_coeff_value) |
| |
| @r0:pu1_pred |
| @r1:pu1_dest |
| @r2:pred_strd |
| @r3:dst_strd |
| |
| |
| |
| push {r0-r11,lr} |
| ldr r4,[sp,#0x34] @loads log2_trans_size |
| ldr r5,[sp,#0x38] @ loads i2_coeff_value |
| |
| mov r10,#1 |
| lsl r4,r10,r4 @ trans_size = (1 << log2_trans_size)@ |
| mov r6,#64 @ 1 << (shift1 - 1)@ |
| mov r7,#2048 @ 1<<(shift2-1) |
| |
| add r8,r6,r5,lsl #6 |
| ssat r8,#16,r8,asr #7 |
| add r5,r7,r8,lsl #6 |
| ssat r6,#16,r5,asr #12 |
| mov r9,r4 |
| mov r8,r4 |
| |
| @ r6 has the dc_value |
| @ r4 has the trans_size value |
| @ r8 has the row value |
| @ r9 has the col value |
| vdup.s16 q0,r6 |
| cmp r4,#4 |
| beq row_loop_4 |
| |
| |
| row_loop: |
| mov r9,r4 |
| |
| |
| col_loop: |
| |
| mov r7,r0 |
| vld1.8 d2,[r7],r2 |
| vld1.8 d3,[r7],r2 |
| vld1.8 d4,[r7],r2 |
| vld1.8 d5,[r7],r2 |
| |
| vld1.8 d6,[r7],r2 |
| vld1.8 d7,[r7],r2 |
| vld1.8 d8,[r7],r2 |
| vld1.8 d9,[r7] |
| |
| add r0,r0,#8 |
| |
| |
| vaddw.u8 q15,q0,d2 |
| vaddw.u8 q14,q0,d3 |
| vaddw.u8 q13,q0,d4 |
| vaddw.u8 q12,q0,d5 |
| vaddw.u8 q11,q0,d6 |
| vaddw.u8 q10,q0,d7 |
| vaddw.u8 q9,q0,d8 |
| vaddw.u8 q8,q0,d9 |
| |
| mov r11,r1 |
| vqmovun.s16 d2,q15 |
| vqmovun.s16 d3,q14 |
| vqmovun.s16 d4,q13 |
| vqmovun.s16 d5,q12 |
| vqmovun.s16 d6,q11 |
| vqmovun.s16 d7,q10 |
| vqmovun.s16 d8,q9 |
| vqmovun.s16 d9,q8 |
| |
| |
| vst1.u32 {d2},[r11],r3 |
| vst1.u32 {d3},[r11],r3 |
| vst1.u32 {d4},[r11],r3 |
| vst1.u32 {d5},[r11],r3 |
| vst1.u32 {d6},[r11],r3 |
| vst1.u32 {d7},[r11],r3 |
| vst1.u32 {d8},[r11],r3 |
| vst1.u32 {d9},[r11] |
| |
| add r1,r1,#8 |
| |
| subs r9,r9,#8 |
| bgt col_loop |
| |
| subs r8,r8,#8 |
| |
| add r0,r0,r2,lsl #3 |
| add r1,r1,r3,lsl #3 |
| sub r0,r0,r4 |
| sub r1,r1,r4 |
| bgt row_loop |
| b end_loops |
| |
| |
| row_loop_4: |
| mov r9,r10 |
| |
| |
| col_loop_4: |
| |
| |
| vld1.8 d2,[r0],r2 |
| vld1.8 d3,[r0],r2 |
| vld1.8 d4,[r0],r2 |
| vld1.8 d5,[r0] |
| |
| |
| |
| |
| vaddw.u8 q15,q0,d2 |
| vaddw.u8 q14,q0,d3 |
| vaddw.u8 q13,q0,d4 |
| vaddw.u8 q12,q0,d5 |
| |
| |
| |
| vqmovun.s16 d2,q15 |
| vqmovun.s16 d3,q14 |
| vqmovun.s16 d4,q13 |
| vqmovun.s16 d5,q12 |
| |
| |
| |
| vst1.u32 {d2[0]},[r1],r3 |
| vst1.u32 {d3[0]},[r1],r3 |
| vst1.u32 {d4[0]},[r1],r3 |
| vst1.u32 {d5[0]},[r1] |
| |
| end_loops: |
| pop {r0-r11,pc} |
| |
| |
| |
| |
| |
| |
| |
| |