blob: c1d09edb842cf92ea6f448ea4fcfe01e8cf8410b [file] [log] [blame]
@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/**
@/*******************************************************************************
@* @file
@* ihevcd_fmt_conv_420sp_to_420p.s
@*
@* @brief
@* contains function definitions for format conversions
@*
@* @author
@* ittiam
@*
@* @par list of functions:
@*
@*
@* @remarks
@* none
@*
@*******************************************************************************/
.text
@/*****************************************************************************
@* *
@* Function Name : neon_copy_yuv420sp_to_yuv420p() *
@* *
@* Description : This function conversts the image from YUV420sP color *
@* space to 420SP color space(UV interleaved). *
@* *
@* Arguments : R0 pu1_src_y *
@* R1 pu1_src_uv *
@* R2 pu1_dest_y *
@* R3 pu1_dest_u *
@* [R13 #40] pu1_dest_v *
@* [R13 #44] u2_width *
@* [R13 #48] u2_height *
@* [R13 #52] u2_stridey *
@* [R13 #56] u2_strideuv *
@* [R13 #60] u2_dest_stridey *
@* [R13 #64] u2_dest_strideuv *
@* [R13 #68] is_u_first *
@* [R13 #72] disable_luma_copy *
@* *
@* Values Returned : None *
@* *
@* Register Usage : R0 - R14 *
@* *
@* Stack Usage : 40 Bytes *
@* *
@* Interruptibility : Interruptible *
@* *
@* Known Limitations *
@* Assumptions: Image Width: Assumed to be multiple of 2 and *
@* Image Height: Assumed to be even. *
@* *
@* Revision History : *
@* DD MM YYYY Author(s) Changes (Describe the changes made) *
@* 16 05 2012 Naveen SR draft *
@* *
@*****************************************************************************/
.globl ihevcd_fmt_conv_420sp_to_420p_a9q
.type ihevcd_fmt_conv_420sp_to_420p_a9q, %function
ihevcd_fmt_conv_420sp_to_420p_a9q:
STMFD sp!,{r4-r12, lr}
LDR r5,[sp,#60] @//Load u2_dest_stridey
@ LDR r6,[sp,#56] @//Load u2_strideuv
LDR r7,[sp,#52] @//Load u2_stridey
LDR r8,[sp,#44] @//Load u2_width
LDR r9,[sp,#48] @//Load u2_height
SUB r10,r7,r8 @// Src Y increment
SUB r11,r5,r8 @// Dst Y increment
LDR r5,[sp,#72] @//Load disable_luma_copy flag
CMP r5,#0 @//skip luma if disable_luma_copy is non-zero
BNE uv_copy_start
@/* Copy Y */
MOV r4,r9 @// Copying height
y_row_loop:
MOV r6,r8 @// Copying width
y_col_loop:
SUB r6,r6,#16
vld1.8 {d0,d1},[r0]!
vst1.8 {d0,d1},[r2]!
CMP r6,#16
BGE y_col_loop
CMP r6,#0
BEQ y_col_loop_end
@//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
@//Ex if width is 162, above loop will process 160 pixels. And
@//Both source and destination will point to 146th pixel and then 16 bytes will be read
@// and written using VLD1 and VST1
RSB r6,r6,#16
SUB r0,r0,r6
SUB r2,r2,r6
vld1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r2]!
y_col_loop_end:
ADD r0, r0, r10
ADD r2, r2, r11
SUBS r4, r4, #1
BGT y_row_loop
@/* Copy UV */
uv_copy_start:
LDR r5,[sp,#64] @//Load u2_dest_strideuv
LDR r7,[sp,#56] @//Load u2_strideuv
MOV r9,r9,LSR #1 @// height/2
@ MOV r8,r8,LSR #1 @// Width/2
SUB r10,r7,r8 @// Src UV increment
MOV r11,r8,LSR #1
SUB r11,r5,r11 @// Dst U and V increment
LDR r5,[sp,#40] @//Load pu1_dest_v
LDR r4,[sp,#68] @//Load is_u_first_flag
CMP r4,#0 @//Swap U and V dest if is_u_first_flag is zero
MOVEQ r4,r5
MOVEQ r5,r3
MOVEQ r3,r4
MOV r4,r9 @// Copying height
uv_row_loop:
MOV r6,r8 @// Copying width
uv_col_loop:
SUB r6,r6,#16
PLD [r1,#128]
vld2.8 {d0,d1},[r1]!
VST1.8 D0,[r3]!
VST1.8 D1,[r5]!
CMP r6,#16
BGE uv_col_loop
CMP r6,#0
BEQ uv_col_loop_end
@//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
@//Ex if width is 162, above loop will process 160 pixels. And
@//Both source and destination will point to 146th pixel and then 16 bytes will be read
@// and written using VLD1 and VST1
RSB r6,r6,#16
SUB r1,r1,r6
SUB r3,r3,r6,LSR #1
SUB r5,r5,r6,LSR #1
vld2.8 {d0,d1}, [r1]!
VST1.8 D0, [r3]!
VST1.8 D1, [r5]!
uv_col_loop_end:
ADD r1, r1, r10
ADD r3, r3, r11
ADD r5, r5, r11
SUBS r4, r4, #1
BGT uv_row_loop
exit:
LDMFD sp!,{r4-r12, pc}