| @/****************************************************************************** |
| @ * |
| @ * Copyright (C) 2015 The Android Open Source Project |
| @ * |
| @ * Licensed under the Apache License, Version 2.0 (the "License"); |
| @ * you may not use this file except in compliance with the License. |
| @ * You may obtain a copy of the License at: |
| @ * |
| @ * http://www.apache.org/licenses/LICENSE-2.0 |
| @ * |
| @ * Unless required by applicable law or agreed to in writing, software |
| @ * distributed under the License is distributed on an "AS IS" BASIS, |
| @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ * See the License for the specific language governing permissions and |
| @ * limitations under the License. |
| @ * |
| @ ***************************************************************************** |
| @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| @*/ |
| |
| @****************************************************************************** |
| @* |
| @* @brief |
| @* This file contains definitions of routines for spatial filter |
| @* |
| @* @author |
| @* Ittiam |
| @* |
| @* @par List of Functions: |
| @* - ideint_spatial_filter_a9() |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| |
| |
| @****************************************************************************** |
| @* |
| @* @brief Performs spatial filtering |
| @* |
| @* @par Description |
| @* This functions performs edge adaptive spatial filtering on a 8x8 block |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[in] pu1_out |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* source stride |
| @* |
| @* @param[in] src_strd |
| @* destination stride |
| @* |
| @* @returns |
| @* None |
| @* |
| @* @remarks |
| @* |
| @****************************************************************************** |
| |
| .global ideint_spatial_filter_a9 |
| |
| ideint_spatial_filter_a9: |
| |
| stmfd sp!, {r4-r10, lr} |
| |
| vmov.u16 q8, #0 |
| vmov.u16 q9, #0 |
| vmov.u16 q10, #0 |
| |
| @ Backup r0 |
| mov r10, r0 |
| |
| @ Load from &pu1_row_1[0] |
| sub r5, r0, #1 |
| vld1.8 d0, [r0], r2 |
| |
| @ Load from &pu1_row_1[-1] |
| vld1.8 d1, [r5] |
| add r5, r5, #2 |
| |
| @ Load from &pu1_row_1[1] |
| vld1.8 d2, [r5] |
| |
| @ Number of rows |
| mov r4, #4 |
| |
| @ EDGE_BIAS_0 |
| vmov.u32 d30, #5 |
| |
| @ EDGE_BIAS_1 |
| vmov.u32 d31, #7 |
| |
| detect_edge: |
| @ Load from &pu1_row_2[0] |
| sub r5, r0, #1 |
| vld1.8 d3, [r0], r2 |
| |
| @ Load from &pu1_row_2[-1] |
| vld1.8 d4, [r5] |
| add r5, r5, #2 |
| |
| @ Load from &pu1_row_2[1] |
| vld1.8 d5, [r5] |
| |
| @ Calculate absolute differences |
| @ pu1_row_1[i] - pu1_row_2[i] |
| vabal.u8 q8, d0, d3 |
| |
| @ pu1_row_1[i - 1] - pu1_row_2[i + 1] |
| vabal.u8 q9, d1, d5 |
| |
| @ pu1_row_1[i + 1] - pu1_row_2[i - 1] |
| vabal.u8 q10, d4, d2 |
| |
| vmov d0, d3 |
| vmov d1, d4 |
| vmov d2, d5 |
| |
| subs r4, r4, #1 |
| bgt detect_edge |
| |
| @ Calculate sum of absolute differeces for each edge |
| vpadd.u16 d16, d16, d17 |
| vpadd.u16 d18, d18, d19 |
| vpadd.u16 d20, d20, d21 |
| |
| vpaddl.u16 d16, d16 |
| vpaddl.u16 d18, d18 |
| vpaddl.u16 d20, d20 |
| |
| @ adiff[0] *= EDGE_BIAS_0; |
| vmul.u32 d16, d16, d30 |
| |
| @ adiff[1] *= EDGE_BIAS_1; |
| vmul.u32 d18, d18, d31 |
| |
| @ adiff[2] *= EDGE_BIAS_1; |
| vmul.u32 d20, d20, d31 |
| |
| @ Move the differences to ARM registers |
| |
| |
| @ Compute shift for first half of the block |
| compute_shift_1: |
| vmov.u32 r5, d16[0] |
| vmov.u32 r6, d18[0] |
| vmov.u32 r7, d20[0] |
| |
| @ Compute shift |
| mov r8, #0 |
| |
| @ adiff[2] <= adiff[1] |
| cmp r7, r6 |
| bgt dir_45_gt_135_1 |
| |
| @ adiff[2] <= adiff[0] |
| cmp r7, r5 |
| movle r8, #1 |
| |
| b compute_shift_2 |
| dir_45_gt_135_1: |
| |
| @ adiff[1] <= adiff[0] |
| cmp r6, r5 |
| @ Move -1 if less than or equal to |
| mvnle r8, #0 |
| |
| |
| compute_shift_2: |
| @ Compute shift for first half of the block |
| vmov.u32 r5, d16[1] |
| vmov.u32 r6, d18[1] |
| vmov.u32 r7, d20[1] |
| |
| @ Compute shift |
| mov r9, #0 |
| |
| @ adiff[2] <= adiff[1] |
| cmp r7, r6 |
| bgt dir_45_gt_135_2 |
| |
| @ adiff[2] <= adiff[0] |
| cmp r7, r5 |
| movle r9, #1 |
| |
| b interpolate |
| dir_45_gt_135_2: |
| |
| @ adiff[1] <= adiff[0] |
| cmp r6, r5 |
| |
| @ Move -1 if less than or equal to |
| mvnle r9, #0 |
| |
| interpolate: |
| add r4, r10, r8 |
| add r5, r10, r2 |
| sub r5, r5, r8 |
| |
| add r10, r10, #4 |
| add r6, r10, r9 |
| add r7, r10, r2 |
| sub r7, r7, r9 |
| mov r8, #4 |
| |
| filter_loop: |
| vld1.u32 d0[0], [r4], r2 |
| vld1.u32 d2[0], [r5], r2 |
| |
| vld1.u32 d0[1], [r6], r2 |
| vld1.u32 d2[1], [r7], r2 |
| |
| vrhadd.u8 d4, d0, d2 |
| vst1.u32 d4, [r1], r3 |
| |
| subs r8, #1 |
| bgt filter_loop |
| |
| ldmfd sp!, {r4-r10, pc} |