| @/***************************************************************************** |
| @* |
| @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| @* |
| @* Licensed under the Apache License, Version 2.0 (the "License"); |
| @* you may not use this file except in compliance with the License. |
| @* You may obtain a copy of the License at: |
| @* |
| @* http://www.apache.org/licenses/LICENSE-2.0 |
| @* |
| @* Unless required by applicable law or agreed to in writing, software |
| @* distributed under the License is distributed on an "AS IS" BASIS, |
| @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @* See the License for the specific language governing permissions and |
| @* limitations under the License. |
| @* |
| @*****************************************************************************/ |
| @/** |
| @/******************************************************************************* |
| @* @file |
| @* ihevcd_fmt_conv_420sp_to_420p.s |
| @* |
| @* @brief |
| @* contains function definitions for format conversions |
| @* |
| @* @author |
| @* ittiam |
| @* |
| @* @par list of functions: |
| @* |
| @* |
| @* @remarks |
| @* none |
| @* |
| @*******************************************************************************/ |
| |
| |
| |
| |
| |
| |
| |
| |
| .text |
| |
| |
| |
| |
| |
| @/***************************************************************************** |
| @* * |
| @* Function Name : neon_copy_yuv420sp_to_yuv420p() * |
| @* * |
| @* Description : This function conversts the image from YUV420sP color * |
| @* space to 420SP color space(UV interleaved). * |
| @* * |
| @* Arguments : R0 pu1_src_y * |
| @* R1 pu1_src_uv * |
| @* R2 pu1_dest_y * |
| @* R3 pu1_dest_u * |
| @* [R13 #40] pu1_dest_v * |
| @* [R13 #44] u2_width * |
| @* [R13 #48] u2_height * |
| @* [R13 #52] u2_stridey * |
| @* [R13 #56] u2_strideuv * |
| @* [R13 #60] u2_dest_stridey * |
| @* [R13 #64] u2_dest_strideuv * |
| @* [R13 #68] is_u_first * |
| @* [R13 #72] disable_luma_copy * |
| @* * |
| @* Values Returned : None * |
| @* * |
| @* Register Usage : R0 - R14 * |
| @* * |
| @* Stack Usage : 40 Bytes * |
| @* * |
| @* Interruptibility : Interruptible * |
| @* * |
| @* Known Limitations * |
| @* Assumptions: Image Width: Assumed to be multiple of 2 and * |
| @* Image Height: Assumed to be even. * |
| @* * |
| @* Revision History : * |
| @* DD MM YYYY Author(s) Changes (Describe the changes made) * |
| @* 16 05 2012 Naveen SR draft * |
| @* * |
| @*****************************************************************************/ |
| |
| .globl ihevcd_fmt_conv_420sp_to_420p_a9q |
| |
| .type ihevcd_fmt_conv_420sp_to_420p_a9q, %function |
| |
| ihevcd_fmt_conv_420sp_to_420p_a9q: |
| STMFD sp!,{r4-r12, lr} |
| |
| LDR r5,[sp,#60] @//Load u2_dest_stridey |
| @ LDR r6,[sp,#56] @//Load u2_strideuv |
| LDR r7,[sp,#52] @//Load u2_stridey |
| LDR r8,[sp,#44] @//Load u2_width |
| LDR r9,[sp,#48] @//Load u2_height |
| |
| SUB r10,r7,r8 @// Src Y increment |
| SUB r11,r5,r8 @// Dst Y increment |
| |
| LDR r5,[sp,#72] @//Load disable_luma_copy flag |
| CMP r5,#0 @//skip luma if disable_luma_copy is non-zero |
| BNE uv_copy_start |
| |
| @/* Copy Y */ |
| |
| MOV r4,r9 @// Copying height |
| y_row_loop: |
| MOV r6,r8 @// Copying width |
| |
| y_col_loop: |
| |
| SUB r6,r6,#16 |
| vld1.8 {d0,d1},[r0]! |
| vst1.8 {d0,d1},[r2]! |
| CMP r6,#16 |
| BGE y_col_loop |
| CMP r6,#0 |
| BEQ y_col_loop_end |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| RSB r6,r6,#16 |
| SUB r0,r0,r6 |
| SUB r2,r2,r6 |
| vld1.8 {d0,d1}, [r0]! |
| vst1.8 {d0,d1}, [r2]! |
| |
| y_col_loop_end: |
| ADD r0, r0, r10 |
| ADD r2, r2, r11 |
| SUBS r4, r4, #1 |
| BGT y_row_loop |
| |
| |
| @/* Copy UV */ |
| uv_copy_start: |
| |
| LDR r5,[sp,#64] @//Load u2_dest_strideuv |
| LDR r7,[sp,#56] @//Load u2_strideuv |
| |
| MOV r9,r9,LSR #1 @// height/2 |
| @ MOV r8,r8,LSR #1 @// Width/2 |
| |
| SUB r10,r7,r8 @// Src UV increment |
| MOV r11,r8,LSR #1 |
| SUB r11,r5,r11 @// Dst U and V increment |
| |
| LDR r5,[sp,#40] @//Load pu1_dest_v |
| |
| LDR r4,[sp,#68] @//Load is_u_first_flag |
| CMP r4,#0 @//Swap U and V dest if is_u_first_flag is zero |
| MOVEQ r4,r5 |
| MOVEQ r5,r3 |
| MOVEQ r3,r4 |
| |
| MOV r4,r9 @// Copying height |
| uv_row_loop: |
| MOV r6,r8 @// Copying width |
| |
| uv_col_loop: |
| |
| SUB r6,r6,#16 |
| |
| PLD [r1,#128] |
| vld2.8 {d0,d1},[r1]! |
| VST1.8 D0,[r3]! |
| VST1.8 D1,[r5]! |
| CMP r6,#16 |
| BGE uv_col_loop |
| CMP r6,#0 |
| BEQ uv_col_loop_end |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| RSB r6,r6,#16 |
| SUB r1,r1,r6 |
| SUB r3,r3,r6,LSR #1 |
| SUB r5,r5,r6,LSR #1 |
| vld2.8 {d0,d1}, [r1]! |
| VST1.8 D0, [r3]! |
| VST1.8 D1, [r5]! |
| uv_col_loop_end: |
| ADD r1, r1, r10 |
| ADD r3, r3, r11 |
| ADD r5, r5, r11 |
| SUBS r4, r4, #1 |
| BGT uv_row_loop |
| |
| exit: |
| LDMFD sp!,{r4-r12, pc} |
| |
| |
| |
| |
| |
| |