| @/****************************************************************************** |
| @ * |
| @ * Copyright (C) 2015 The Android Open Source Project |
| @ * |
| @ * Licensed under the Apache License, Version 2.0 (the "License"); |
| @ * you may not use this file except in compliance with the License. |
| @ * You may obtain a copy of the License at: |
| @ * |
| @ * http://www.apache.org/licenses/LICENSE-2.0 |
| @ * |
| @ * Unless required by applicable law or agreed to in writing, software |
| @ * distributed under the License is distributed on an "AS IS" BASIS, |
| @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ * See the License for the specific language governing permissions and |
| @ * limitations under the License. |
| @ * |
| @ ***************************************************************************** |
| @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| @*/ |
| |
| @/* |
| @//---------------------------------------------------------------------------- |
| @// File Name : impeg2_format_conv.s |
| @// |
| @// Description : This file has the Idct Implementations for the |
| @// MPEG4 SP decoder on neon platform. |
| @// |
| @// Reference Document : |
| @// |
| @// Revision History : |
| @// Date Author Detail Description |
| @// ------------ ---------------- ---------------------------------- |
| @// Jul 07, 2008 Naveen Kumar T Created |
| @// |
| @//------------------------------------------------------------------------- |
| @*/ |
| |
| @/* |
| @// ---------------------------------------------------------------------------- |
| @// Include Files |
| @// ---------------------------------------------------------------------------- |
| @*/ |
| .text |
| .p2align 2 |
| .equ log2_16 , 4 |
| .equ log2_2 , 1 |
| @/* |
| @// ---------------------------------------------------------------------------- |
| @// Struct/Union Types and Define |
| @// ---------------------------------------------------------------------------- |
| @*/ |
| |
| @/* |
| @// ---------------------------------------------------------------------------- |
| @// Static Global Data section variables |
| @// ---------------------------------------------------------------------------- |
| @*/ |
| @//--------------------------- NONE -------------------------------------------- |
| |
| @/* |
| @// ---------------------------------------------------------------------------- |
| @// Static Prototype Functions |
| @// ---------------------------------------------------------------------------- |
| @*/ |
| @// -------------------------- NONE -------------------------------------------- |
| |
| @/* |
| @// ---------------------------------------------------------------------------- |
| @// Exported functions |
| @// ---------------------------------------------------------------------------- |
| @*/ |
| |
| @/***************************************************************************** |
| @* * |
| @* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q() * |
| @* * |
| @* Description : This function conversts the image from YUV420P color * |
| @* space to 420SP color space(UV interleaved). * |
| @* * |
| @* Arguments : R0 pu1_y * |
| @* R1 pu1_u * |
| @* R2 pu1_v * |
| @* R3 pu1_dest_y * |
| @* [R13 #40] pu1_dest_uv * |
| @* [R13 #44] u2_height * |
| @* [R13 #48] u2_width * |
| @* [R13 #52] u2_stridey * |
| @* [R13 #56] u2_strideu * |
| @* [R13 #60] u2_stridev * |
| @* [R13 #64] u2_dest_stride_y * |
| @* [R13 #68] u2_dest_stride_uv * |
| @* [R13 #72] convert_uv_only * |
| @* * |
| @* Values Returned : None * |
| @* * |
| @* Register Usage : R0 - R8, Q0 * |
| @* * |
| @* Stack Usage : 24 Bytes * |
| @* * |
| @* Interruptibility : Interruptible * |
| @* * |
| @* Known Limitations * |
| @* Assumptions: Image Width: Assumed to be multiple of 16 and * |
| @* greater than or equal to 16 * |
| @* Image Height: Assumed to be even. * |
| @* * |
| @* Revision History : * |
| @* DD MM YYYY Author(s) Changes (Describe the changes made) * |
| @* 07 06 2010 Varshita Draft * |
| @* 07 06 2010 Naveen Kr T Completed * |
| @* * |
| @*****************************************************************************/ |
| .global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q |
| impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q: |
| |
| @// push the registers on the stack |
| stmfd sp!, {r4-r8, lr} |
| |
| ldr r4, [sp, #56] @// Load convert_uv_only |
| |
| cmp r4, #1 |
| beq yuv420sp_uv_chroma |
| @/* Do the preprocessing before the main loops start */ |
| @// Load the parameters from stack |
| ldr r4, [sp, #28] @// Load u2_height from stack |
| |
| ldr r5, [sp, #32] @// Load u2_width from stack |
| |
| ldr r7, [sp, #36] @// Load u2_stridey from stack |
| |
| ldr r8, [sp, #48] @// Load u2_dest_stride_y from stack |
| |
| sub r7, r7, r5 @// Source increment |
| |
| sub r8, r8, r5 @// Destination increment |
| |
| |
| yuv420sp_uv_row_loop_y: |
| mov r6, r5 |
| |
| yuv420sp_uv_col_loop_y: |
| pld [r0, #128] |
| vld1.8 {q0}, [r0]! |
| vst1.8 {q0}, [r3]! |
| sub r6, r6, #16 |
| cmp r6, #15 |
| bgt yuv420sp_uv_col_loop_y |
| |
| cmp r6, #0 |
| beq yuv420sp_uv_row_loop_end_y |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| rsb r6, r6, #16 |
| sub r0, r0, r6 |
| sub r3, r3, r6 |
| |
| vld1.8 {q0}, [r0]! |
| vst1.8 {q0}, [r3]! |
| |
| yuv420sp_uv_row_loop_end_y: |
| add r0, r0, r7 |
| add r3, r3, r8 |
| subs r4, r4, #1 |
| bgt yuv420sp_uv_row_loop_y |
| |
| yuv420sp_uv_chroma: |
| |
| ldr r3, [sp, #24] @// Load pu1_dest_uv from stack |
| |
| ldr r4, [sp, #28] @// Load u2_height from stack |
| add r4, r4, 1 |
| |
| ldr r5, [sp, #32] @// Load u2_width from stack |
| add r5, r5, 1 |
| bic r5, r5, #1 |
| |
| ldr r7, [sp, #40] @// Load u2_strideu from stack |
| |
| ldr r8, [sp, #52] @// Load u2_dest_stride_uv from stack |
| |
| sub r7, r7, r5, lsr #1 @// Source increment |
| |
| sub r8, r8, r5 @// Destination increment |
| |
| mov r5, r5, lsr #1 |
| mov r4, r4, lsr #1 |
| ldr r3, [sp, #24] @// Load pu1_dest_uv from stack |
| yuv420sp_uv_row_loop_uv: |
| mov r6, r5 |
| |
| |
| yuv420sp_uv_col_loop_uv: |
| pld [r1, #128] |
| pld [r2, #128] |
| vld1.8 d0, [r1]! |
| vld1.8 d1, [r2]! |
| vst2.8 {d0, d1}, [r3]! |
| sub r6, r6, #8 |
| cmp r6, #7 |
| bgt yuv420sp_uv_col_loop_uv |
| |
| cmp r6, #0 |
| beq yuv420sp_uv_row_loop_end_uv |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| rsb r6, r6, #8 |
| sub r1, r1, r6 |
| sub r2, r2, r6 |
| sub r3, r3, r6, lsl #1 |
| |
| vld1.8 d0, [r1]! |
| vld1.8 d1, [r2]! |
| vst2.8 {d0, d1}, [r3]! |
| |
| yuv420sp_uv_row_loop_end_uv: |
| add r1, r1, r7 |
| add r2, r2, r7 |
| add r3, r3, r8 |
| subs r4, r4, #1 |
| bgt yuv420sp_uv_row_loop_uv |
| @//POP THE REGISTERS |
| ldmfd sp!, {r4-r8, pc} |
| |
| |
| |
| |
| |
| @/***************************************************************************** |
| @* * |
| @* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q() * |
| @* * |
| @* Description : This function conversts the image from YUV420P color * |
| @* space to 420SP color space(VU interleaved). * |
| @* This function is similar to above function * |
| @* IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in * |
| @* VLD1.8 for chroma - order of registers is different * |
| @* * |
| @* Arguments : R0 pu1_y * |
| @* R1 pu1_u * |
| @* R2 pu1_v * |
| @* R3 pu1_dest_y * |
| @* [R13 #40] pu1_dest_uv * |
| @* [R13 #44] u2_height * |
| @* [R13 #48] u2_width * |
| @* [R13 #52] u2_stridey * |
| @* [R13 #56] u2_strideu * |
| @* [R13 #60] u2_stridev * |
| @* [R13 #64] u2_dest_stride_y * |
| @* [R13 #68] u2_dest_stride_uv * |
| @* [R13 #72] convert_uv_only * |
| @* * |
| @* Values Returned : None * |
| @* * |
| @* Register Usage : R0 - R8, Q0 * |
| @* * |
| @* Stack Usage : 24 Bytes * |
| @* * |
| @* Interruptibility : Interruptible * |
| @* * |
| @* Known Limitations * |
| @* Assumptions: Image Width: Assumed to be multiple of 16 and * |
| @* greater than or equal to 16 * |
| @* Image Height: Assumed to be even. * |
| @* * |
| @* Revision History : * |
| @* DD MM YYYY Author(s) Changes (Describe the changes made) * |
| @* 07 06 2010 Varshita Draft * |
| @* 07 06 2010 Naveen Kr T Completed * |
| @* * |
| @*****************************************************************************/ |
| |
| .global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q |
| impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q: |
| |
| @// push the registers on the stack |
| stmfd sp!, {r4-r8, lr} |
| |
| ldr r4, [sp, #56] @// Load convert_uv_only |
| |
| cmp r4, #1 |
| beq yuv420sp_vu_chroma |
| |
| @/* Do the preprocessing before the main loops start */ |
| @// Load the parameters from stack |
| ldr r4, [sp, #28] @// Load u2_height from stack |
| |
| ldr r5, [sp, #32] @// Load u2_width from stack |
| |
| ldr r7, [sp, #36] @// Load u2_stridey from stack |
| |
| ldr r8, [sp, #48] @// Load u2_dest_stride_y from stack |
| |
| sub r7, r7, r5 @// Source increment |
| |
| sub r8, r8, r5 @// Destination increment |
| |
| |
| yuv420sp_vu_row_loop_y: |
| mov r6, r5 |
| |
| yuv420sp_vu_col_loop_y: |
| pld [r0, #128] |
| vld1.8 {q0}, [r0]! |
| vst1.8 {q0}, [r3]! |
| sub r6, r6, #16 |
| cmp r6, #15 |
| bgt yuv420sp_vu_col_loop_y |
| |
| cmp r6, #0 |
| beq yuv420sp_vu_row_loop_end_y |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| rsb r6, r6, #16 |
| sub r0, r0, r6 |
| sub r3, r3, r6 |
| |
| vld1.8 {q0}, [r0]! |
| vst1.8 {q0}, [r3]! |
| |
| yuv420sp_vu_row_loop_end_y: |
| add r0, r0, r7 |
| add r3, r3, r8 |
| subs r4, r4, #1 |
| bgt yuv420sp_vu_row_loop_y |
| |
| yuv420sp_vu_chroma: |
| |
| ldr r3, [sp, #24] @// Load pu1_dest_uv from stack |
| |
| ldr r4, [sp, #28] @// Load u2_height from stack |
| add r4, r4, 1 |
| |
| ldr r5, [sp, #32] @// Load u2_width from stack |
| add r5, r5, 1 |
| bic r5, r5, #1 |
| |
| ldr r7, [sp, #40] @// Load u2_strideu from stack |
| |
| ldr r8, [sp, #52] @// Load u2_dest_stride_uv from stack |
| |
| sub r7, r7, r5, lsr #1 @// Source increment |
| |
| sub r8, r8, r5 @// Destination increment |
| |
| mov r5, r5, lsr #1 |
| mov r4, r4, lsr #1 |
| ldr r3, [sp, #24] @// Load pu1_dest_uv from stack |
| yuv420sp_vu_row_loop_uv: |
| mov r6, r5 |
| |
| |
| yuv420sp_vu_col_loop_uv: |
| pld [r1, #128] |
| pld [r2, #128] |
| vld1.8 d1, [r1]! |
| vld1.8 d0, [r2]! |
| vst2.8 {d0, d1}, [r3]! |
| sub r6, r6, #8 |
| cmp r6, #7 |
| bgt yuv420sp_vu_col_loop_uv |
| |
| cmp r6, #0 |
| beq yuv420sp_vu_row_loop_end_uv |
| @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read |
| @//Ex if width is 162, above loop will process 160 pixels. And |
| @//Both source and destination will point to 146th pixel and then 16 bytes will be read |
| @// and written using VLD1 and VST1 |
| rsb r6, r6, #8 |
| sub r1, r1, r6 |
| sub r2, r2, r6 |
| sub r3, r3, r6, lsl #1 |
| |
| vld1.8 d1, [r1]! |
| vld1.8 d0, [r2]! |
| vst2.8 {d0, d1}, [r3]! |
| |
| yuv420sp_vu_row_loop_end_uv: |
| add r1, r1, r7 |
| add r2, r2, r7 |
| add r3, r3, r8 |
| subs r4, r4, #1 |
| bgt yuv420sp_vu_row_loop_uv |
| @//POP THE REGISTERS |
| ldmfd sp!, {r4-r8, pc} |
| |
| |
| |
| |
| |