| @/***************************************************************************** |
| @* |
| @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| @* |
| @* Licensed under the Apache License, Version 2.0 (the "License"); |
| @* you may not use this file except in compliance with the License. |
| @* You may obtain a copy of the License at: |
| @* |
| @* http://www.apache.org/licenses/LICENSE-2.0 |
| @* |
| @* Unless required by applicable law or agreed to in writing, software |
| @* distributed under the License is distributed on an "AS IS" BASIS, |
| @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @* See the License for the specific language governing permissions and |
| @* limitations under the License. |
| @* |
| @*****************************************************************************/ |
| @/** |
| @/******************************************************************************* |
| @* @file |
| @* ihevcd_fmt_conv_420sp_to_420sp.s |
| @* |
| @* @brief |
| @* contains function definitions for format conversions |
| @* |
| @* @author |
| @* ittiam |
| @* |
| @* @par list of functions: |
| @* |
| @* |
| @* @remarks |
| @* none |
| @* |
| @*******************************************************************************/ |
| .equ DO1STROUNDING, 0 |
| |
| @ ARM |
| @ |
| @ PRESERVE8 |
| |
| .text |
| .p2align 2 |
| |
| |
| |
| |
| |
| @/***************************************************************************** |
| @* * |
| @* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * |
| @* * |
| @* Description : This function conversts the image from YUV420SP color * |
| @* space to 420SP color space(UV interleaved). * |
| @* * |
| @* Arguments : R0 pu1_y * |
| @* R1 pu1_uv * |
| @* R2 pu1_dest_y * |
| @* R3 pu1_dest_uv * |
| @* [R13 #40] u2_width * |
| @* [R13 #44] u2_height * |
| @* [R13 #48] u2_stridey * |
| @* [R13 #52] u2_stridechroma * |
| @* [R13 #56] u2_dest_stridey * |
| @* [R13 #60] u2_dest_stridechroma * |
| @* * |
| @* Values Returned : None * |
| @* * |
| @* Register Usage : R0 - R14 * |
| @* * |
| @* Stack Usage : 40 Bytes * |
| @* * |
| @* Interruptibility : Interruptible * |
| @* * |
| @* Known Limitations * |
| @* Assumptions: Image Width: Assumed to be multiple of 2 and * |
| @* Image Height: Assumed to be even. * |
| @* * |
| @* Revision History : * |
| @* DD MM YYYY Author(s) Changes (Describe the changes made) * |
| @* 16 05 2012 Naveen SR draft * |
| @* * |
| @*****************************************************************************/ |
| |
| .global ihevcd_fmt_conv_420sp_to_420sp_a9q |
| .type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function |
| ihevcd_fmt_conv_420sp_to_420sp_a9q: |
| |
| STMFD sp!,{r4-r12, lr} |
| |
| |
| LDR r5,[sp,#56] @//Load u2_dest_stridey |
| |
| LDR r7,[sp,#48] @//Load u2_stridey |
| LDR r8,[sp,#40] @//Load u2_width |
| LDR r9,[sp,#44] @//Load u2_height |
| |
| SUB r10,r7,r8 @// Src Y increment |
| SUB r11,r5,r8 @// Dst Y increment |
| |
| @/* Copy Y */ |
| |
| MOV r4,r9 @// Copying height |
| y_row_loop: |
| MOV r6,r8 @// Copying width |
| |
| y_col_loop: |
| PLD [r0, #128] |
| SUB r6,r6,#32 |
| VLD1.8 D0,[r0]! |
| VLD1.8 D1,[r0]! |
| VLD1.8 D2,[r0]! |
| VLD1.8 D3,[r0]! |
| VST1.8 D0,[R2]! |
| VST1.8 D1,[R2]! |
| VST1.8 D2,[R2]! |
| VST1.8 D3,[R2]! |
| CMP r6,#32 |
| BGE y_col_loop |
| CMP r6,#0 |
| BEQ y_col_loop_end |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| RSB r6,r6,#32 |
| SUB r0,r0,r6 |
| SUB R2,R2,r6 |
| VLD1.8 D0,[r0]! |
| VLD1.8 D1,[r0]! |
| VLD1.8 D2,[r0]! |
| VLD1.8 D3,[r0]! |
| VST1.8 D0,[R2]! |
| VST1.8 D1,[R2]! |
| VST1.8 D2,[R2]! |
| VST1.8 D3,[R2]! |
| |
| y_col_loop_end: |
| ADD r0, r0, r10 |
| ADD R2, R2, r11 |
| SUBS r4, r4, #1 |
| BGT y_row_loop |
| |
| |
| |
| @/* Copy UV */ |
| |
| LDR r5,[sp,#60] @//Load u2_dest_stridechroma |
| LDR r7,[sp,#52] @//Load u2_stridechroma |
| |
| MOV r9,r9,LSR #1 @// height/2 |
| @ MOV r8,r8,LSR #1 @// Width/2 |
| |
| MOV R2,R3 @pu1_dest_uv |
| |
| SUB r10,r7,r8 @// Src UV increment |
| SUB r11,r5,r8 @// Dst UV increment |
| |
| MOV r4,r9 @// Copying height |
| uv_row_loop: |
| MOV r6,r8 @// Copying width |
| |
| uv_col_loop: |
| |
| PLD [r1, #128] |
| SUB r6,r6,#16 |
| VLD1.8 D0,[r1]! |
| VLD1.8 D1,[r1]! |
| VST1.8 D0,[R2]! |
| VST1.8 D1,[R2]! |
| CMP r6,#16 |
| BGE uv_col_loop |
| CMP r6,#0 |
| BEQ u_col_loop_end |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| RSB r6,r6,#16 |
| SUB r1,r1,r6 |
| SUB R2,R2,r6 |
| VLD1.8 D0, [r1]! |
| VLD1.8 D1, [r1]! |
| VST1.8 D0, [R2]! |
| VST1.8 D1, [R2]! |
| |
| u_col_loop_end: |
| ADD r1, r1, r10 |
| ADD R2, R2, r11 |
| SUBS r4, r4, #1 |
| BGT uv_row_loop |
| |
| exit: |
| LDMFD sp!,{r4-r12, pc} |
| |
| |
| .section .note.GNU-stack,"",%progbits |
| |