| ///***************************************************************************** |
| //* |
| //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| //* |
| //* Licensed under the Apache License, Version 2.0 (the "License"); |
| //* you may not use this file except in compliance with the License. |
| //* You may obtain a copy of the License at: |
| //* |
| //* http://www.apache.org/licenses/LICENSE-2.0 |
| //* |
| //* Unless required by applicable law or agreed to in writing, software |
| //* distributed under the License is distributed on an "AS IS" BASIS, |
| //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| //* See the License for the specific language governing permissions and |
| //* limitations under the License. |
| //* |
| //*****************************************************************************/ |
| ///** |
| ///******************************************************************************* |
| //* //file |
| //* ihevcd_fmt_conv_420sp_to_420p.s |
| //* |
| //* //brief |
| //* contains function definitions for format conversions |
| //* |
| //* //author |
| //* ittiam |
| //* |
| //* //par list of functions: |
| //* |
| //* |
| //* //remarks |
| //* none |
| //* |
| //*******************************************************************************/ |
| |
| .text |
| |
| .include "ihevc_neon_macros.s" |
| |
| |
| |
| |
| ///***************************************************************************** |
| //* * |
| //* Function Name : neon_copy_yuv420sp_to_yuv420p() * |
| //* * |
| //* Description : This function conversts the image from YUV420sP color * |
| //* space to 420SP color space(UV interleaved). * |
| //* * |
| //* Arguments : x0 pu1_src_y * |
| //* x1 pu1_src_uv * |
| //* x2 pu1_dest_y * |
| //* x3 pu1_dest_u * |
| //* [x13 #40] pu1_dest_v * |
| //* [x13 #44] u2_width * |
| //* [x13 #48] u2_height * |
| //* [x13 #52] u2_stridey * |
| //* [x13 #56] u2_strideuv * |
| //* [x13 #60] u2_dest_stridey * |
| //* [x13 #64] u2_dest_strideuv * |
| //* [x13 #68] is_u_first * |
| //* [x13 #72] disable_luma_copy * |
| //* * |
| //* Values Returned : None * |
| //* * |
| //* Register Usage : x0 - x14 * |
| //* * |
| //* Stack Usage : 40 Bytes * |
| //* * |
| //* Interruptibility : Interruptible * |
| //* * |
| //* Known Limitations * |
| //* Assumptions: Image Width: Assumed to be multiple of 2 and * |
| //* Image Height: Assumed to be even. * |
| //* * |
| //* Revision History : * |
| //* DD MM YYYY Author(s) Changes (Describe the changes made) * |
| //* 16 05 2012 Naveen SR draft * |
| //* * |
| //*****************************************************************************/ |
| |
| .globl ihevcd_fmt_conv_420sp_to_420p_av8 |
| |
| .type ihevcd_fmt_conv_420sp_to_420p_av8, %function |
| |
| ihevcd_fmt_conv_420sp_to_420p_av8: |
| // STMFD sp!,{x4-x12, x14} |
| push_v_regs |
| stp x19, x20,[sp,#-16]! |
| mov x15, x4 |
| mov x8, x5 ////Load u2_width |
| mov x9, x6 ////Load u2_height |
| |
| LDR w5, [sp,#88] ////Load u2_dest_stridey |
| sxtw x5,w5 |
| // LDR x6,[sp,#80] @//Load u2_strideuv |
| |
| SUB x10,x7,x8 //// Src Y increment |
| SUB x11,x5,x8 //// Dst Y increment |
| |
| LDR w5, [sp,#112] ////Load disable_luma_copy flag |
| sxtw x5,w5 |
| CMP x5,#0 ////skip luma if disable_luma_copy is non-zero |
| BNE uv_copy_start |
| |
| ///* Copy Y */ |
| |
| MOV x4,x9 //// Copying height |
| y_row_loop: |
| MOV x6,x8 //// Copying width |
| |
| y_col_loop: |
| |
| SUB x6,x6,#16 |
| ld1 {v0.8b, v1.8b},[x0],#16 |
| st1 {v0.8b, v1.8b},[x2],#16 |
| CMP x6,#16 |
| BGE y_col_loop |
| CMP x6,#0 |
| BEQ y_col_loop_end |
| ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| ////Ex if width is 162, above loop will process 160 pixels. And |
| ////Both source and destination will point to 146th pixel and then 16 bytes will be read |
| //// and written using VLD1 and VST1 |
| sub x20,x6,#16 |
| neg x6, x20 |
| SUB x0,x0,x6 |
| SUB x2,x2,x6 |
| ld1 {v0.8b, v1.8b}, [x0],#16 |
| st1 {v0.8b, v1.8b}, [x2],#16 |
| |
| y_col_loop_end: |
| ADD x0, x0, x10 |
| ADD x2, x2, x11 |
| SUBS x4, x4, #1 |
| BGT y_row_loop |
| |
| |
| ///* Copy UV */ |
| uv_copy_start: |
| |
| LDR w5, [sp,#96] ////Load u2_dest_strideuv |
| sxtw x5,w5 |
| LDR w7, [sp,#80] ////Load u2_strideuv |
| sxtw x7,w7 |
| |
| LSR x9, x9, #1 //// height/2 |
| // MOV x8,x8,LSR #1 @// Width/2 |
| |
| SUB x10,x7,x8 //// Src UV increment |
| LSR x11, x8, #1 |
| SUB x11,x5,x11 //// Dst U and V increment |
| |
| mov x5, x15 ////Load pu1_dest_v |
| |
| LDR w4, [sp,#104] ////Load is_u_first_flag |
| sxtw x4,w4 |
| CMP x4,#0 ////Swap U and V dest if is_u_first_flag is zero |
| csel x4, x5, x4,EQ |
| csel x5, x3, x5,EQ |
| csel x3, x4, x3,EQ |
| |
| MOV x4,x9 //// Copying height |
| uv_row_loop: |
| MOV x6,x8 //// Copying width |
| |
| uv_col_loop: |
| |
| SUB x6,x6,#16 |
| |
| prfm PLDL1KEEP,[x1,#128] |
| ld2 {v0.8b, v1.8b},[x1],#16 |
| ST1 {v0.8b},[x3],#8 |
| ST1 {v1.8b},[x5],#8 |
| CMP x6,#16 |
| BGE uv_col_loop |
| CMP x6,#0 |
| BEQ uv_col_loop_end |
| ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| ////Ex if width is 162, above loop will process 160 pixels. And |
| ////Both source and destination will point to 146th pixel and then 16 bytes will be read |
| //// and written using VLD1 and VST1 |
| sub x20,x6,#16 |
| neg x6, x20 |
| SUB x1,x1,x6 |
| SUB x3,x3,x6,LSR #1 |
| SUB x5,x5,x6,LSR #1 |
| ld2 {v0.8b, v1.8b}, [x1],#16 |
| ST1 {v0.8b},[x3],#8 |
| ST1 {v1.8b},[x5],#8 |
| uv_col_loop_end: |
| ADD x1, x1, x10 |
| ADD x3, x3, x11 |
| ADD x5, x5, x11 |
| SUBS x4, x4, #1 |
| BGT uv_row_loop |
| |
| exit: |
| // LDMFD sp!,{x4-x12, pc} |
| ldp x19, x20,[sp],#16 |
| pop_v_regs |
| ret |
| |
| |
| |
| |
| |
| |