| ///***************************************************************************** |
| //* |
| //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| //* |
| //* Licensed under the Apache License, Version 2.0 (the "License"); |
| //* you may not use this file except in compliance with the License. |
| //* You may obtain a copy of the License at: |
| //* |
| //* http://www.apache.org/licenses/LICENSE-2.0 |
| //* |
| //* Unless required by applicable law or agreed to in writing, software |
| //* distributed under the License is distributed on an "AS IS" BASIS, |
| //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| //* See the License for the specific language governing permissions and |
| //* limitations under the License. |
| //* |
| //*****************************************************************************/ |
| ///** |
| ///******************************************************************************* |
| //* //file |
| //* ihevcd_fmt_conv_420sp_to_420sp.s |
| //* |
| //* //brief |
| //* contains function definitions for format conversions |
| //* |
| //* //author |
| //* ittiam |
| //* |
| //* //par list of functions: |
| //* |
| //* |
| //* //remarks |
| //* none |
| //* |
| //*******************************************************************************/ |
| .equ DO1STROUNDING, 0 |
| |
| // ARM |
| // |
| // PRESERVE8 |
| |
| .text |
| .p2align 2 |
| |
| .include "ihevc_neon_macros.s" |
| |
| |
| |
| |
| ///***************************************************************************** |
| //* * |
| //* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * |
| //* * |
| //* Description : This function conversts the image from YUV420SP color * |
| //* space to 420SP color space(UV interleaved). * |
| //* * |
| //* Arguments : x0 pu1_y * |
| //* x1 pu1_uv * |
| //* x2 pu1_dest_y * |
| //* x3 pu1_dest_uv * |
| //* [x13 #40] u2_width * |
| //* [x13 #44] u2_height * |
| //* [x13 #48] u2_stridey * |
| //* [x13 #52] u2_stridechroma * |
| //* [x13 #56] u2_dest_stridey * |
| //* [x13 #60] u2_dest_stridechroma * |
| //* * |
| //* Values Returned : None * |
| //* * |
| //* Register Usage : x0 - x14 * |
| //* * |
| //* Stack Usage : 40 Bytes * |
| //* * |
| //* Interruptibility : Interruptible * |
| //* * |
| //* Known Limitations * |
| //* Assumptions: Image Width: Assumed to be multiple of 2 and * |
| //* Image Height: Assumed to be even. * |
| //* * |
| //* Revision History : * |
| //* DD MM YYYY Author(s) Changes (Describe the changes made) * |
| //* 16 05 2012 Naveen SR draft * |
| //* * |
| //*****************************************************************************/ |
| |
| .global ihevcd_fmt_conv_420sp_to_420sp_av8 |
| .type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function |
| ihevcd_fmt_conv_420sp_to_420sp_av8: |
| |
| // STMFD sp!,{x4-x12, x14} |
| push_v_regs |
| stp x19, x20,[sp,#-16]! |
| |
| mov x8, x4 ////Load u2_width |
| mov x9, x5 ////Load u2_height |
| |
| LDR w5, [sp,#80] ////Load u2_dest_stridey |
| sxtw x5,w5 |
| |
| mov x7, x6 ////Load u2_stridey |
| |
| SUB x10,x7,x8 //// Src Y increment |
| SUB x11,x5,x8 //// Dst Y increment |
| |
| ///* Copy Y */ |
| |
| MOV x4,x9 //// Copying height |
| y_row_loop: |
| MOV x6,x8 //// Copying width |
| |
| y_col_loop: |
| prfm PLDL1KEEP,[x0, #128] |
| SUB x6,x6,#32 |
| LD1 {v0.8b},[x0],#8 |
| LD1 {v1.8b},[x0],#8 |
| LD1 {v2.8b},[x0],#8 |
| LD1 {v3.8b},[x0],#8 |
| ST1 {v0.8b},[x2],#8 |
| ST1 {v1.8b},[x2],#8 |
| ST1 {v2.8b},[x2],#8 |
| ST1 {v3.8b},[x2],#8 |
| CMP x6,#32 |
| BGE y_col_loop |
| CMP x6,#0 |
| BEQ y_col_loop_end |
| ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| ////Ex if width is 162, above loop will process 160 pixels. And |
| ////Both source and destination will point to 146th pixel and then 16 bytes will be read |
| //// and written using VLD1 and VST1 |
| sub x20,x6,#32 |
| neg x6, x20 |
| SUB x0,x0,x6 |
| SUB x2,x2,x6 |
| LD1 {v0.8b},[x0],#8 |
| LD1 {v1.8b},[x0],#8 |
| LD1 {v2.8b},[x0],#8 |
| LD1 {v3.8b},[x0],#8 |
| ST1 {v0.8b},[x2],#8 |
| ST1 {v1.8b},[x2],#8 |
| ST1 {v2.8b},[x2],#8 |
| ST1 {v3.8b},[x2],#8 |
| |
| y_col_loop_end: |
| ADD x0, x0, x10 |
| ADD x2, x2, x11 |
| SUBS x4, x4, #1 |
| BGT y_row_loop |
| |
| |
| |
| ///* Copy UV */ |
| |
| LDR w5, [sp,#88] ////Load u2_dest_stridechroma |
| sxtw x5,w5 |
| |
| LSR x9, x9, #1 //// height/2 |
| // MOV x8,x8,LSR #1 @// Width/2 |
| |
| MOV x2,x3 //pu1_dest_uv |
| |
| SUB x10,x7,x8 //// Src UV increment |
| SUB x11,x5,x8 //// Dst UV increment |
| |
| MOV x4,x9 //// Copying height |
| uv_row_loop: |
| MOV x6,x8 //// Copying width |
| |
| uv_col_loop: |
| |
| prfm PLDL1KEEP,[x1, #128] |
| SUB x6,x6,#16 |
| LD1 {v0.8b},[x1],#8 |
| LD1 {v1.8b},[x1],#8 |
| ST1 {v0.8b},[x2],#8 |
| ST1 {v1.8b},[x2],#8 |
| CMP x6,#16 |
| BGE uv_col_loop |
| CMP x6,#0 |
| BEQ u_col_loop_end |
| ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| ////Ex if width is 162, above loop will process 160 pixels. And |
| ////Both source and destination will point to 146th pixel and then 16 bytes will be read |
| //// and written using VLD1 and VST1 |
| sub x20,x6,#16 |
| neg x6, x20 |
| SUB x1,x1,x6 |
| SUB x2,x2,x6 |
| LD1 {v0.8b},[x1],#8 |
| LD1 {v1.8b},[x1],#8 |
| ST1 {v0.8b},[x2],#8 |
| ST1 {v1.8b},[x2],#8 |
| |
| u_col_loop_end: |
| ADD x1, x1, x10 |
| ADD x2, x2, x11 |
| SUBS x4, x4, #1 |
| BGT uv_row_loop |
| |
| exit: |
| // LDMFD sp!,{x4-x12, pc} |
| ldp x19, x20,[sp],#16 |
| pop_v_regs |
| ret |
| |
| |
| .section .note.GNU-stack,"",%progbits |
| |