| //****************************************************************************** |
| //* |
| //* Copyright (C) 2015 The Android Open Source Project |
| //* |
| //* Licensed under the Apache License, Version 2.0 (the "License"); |
| //* you may not use this file except in compliance with the License. |
| //* You may obtain a copy of the License at: |
| //* |
| //* http://www.apache.org/licenses/LICENSE-2.0 |
| //* |
| //* Unless required by applicable law or agreed to in writing, software |
| //* distributed under the License is distributed on an "AS IS" BASIS, |
| //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| //* See the License for the specific language governing permissions and |
| //* limitations under the License. |
| //* |
| //***************************************************************************** |
| //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| //*/ |
| ///** |
| //****************************************************************************** |
| //* @file |
| //* ih264_intra_pred_luma_16x16_av8.s |
| //* |
| //* @brief |
| //* Contains function definitions for intra 16x16 Luma prediction . |
| //* |
| //* @author |
| //* Ittiam |
| //* |
| //* @par List of Functions: |
| //* |
| //* - ih264_intra_pred_luma_16x16_mode_vert_av8() |
| //* - ih264_intra_pred_luma_16x16_mode_horz_av8() |
| //* - ih264_intra_pred_luma_16x16_mode_dc_av8() |
| //* - ih264_intra_pred_luma_16x16_mode_plane_av8() |
| //* |
| //* @remarks |
| //* None |
| //* |
| //******************************************************************************* |
| //*/ |
| |
| ///* All the functions here are replicated from ih264_intra_pred_filters.c |
| // |
| |
| ///** |
| ///** |
| ///** |
| // |
| |
| |
| .text |
| .p2align 2 |
| .include "ih264_neon_macros.s" |
| .extern ih264_gai1_intrapred_luma_plane_coeffs |
| |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //*ih264_intra_pred_luma_16x16_mode_vert |
| //* |
| //* @brief |
| //* Perform Intra prediction for luma_16x16 mode:vertical |
| //* |
| //* @par Description: |
| //* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 |
| //* |
| //* @param[in] pu1_src |
| //* UWORD8 pointer to the source |
| //* |
| //* @param[out] pu1_dst |
| //* UWORD8 pointer to the destination |
| //* |
| //* @param[in] src_strd |
| //* integer source stride |
| //* |
| //* @param[in] dst_strd |
| //* integer destination stride |
| //* |
| //* @param[in] ui_neighboravailability |
| //* availability of neighbouring pixels(Not used in this function) |
| //* |
| //* @returns |
| //* |
| //* @remarks |
| //* None |
| //* |
| //******************************************************************************* |
| //void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, |
| // UWORD8 *pu1_dst, |
| // WORD32 src_strd, |
| // WORD32 dst_strd, |
| // WORD32 ui_neighboravailability) |
| |
| //**************Variables Vs Registers***************************************** |
| // x0 => *pu1_src |
| // x1 => *pu1_dst |
| // x2 => src_strd |
| // x3 => dst_strd |
| // x4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_16x16_mode_vert_av8 |
| |
| ih264_intra_pred_luma_16x16_mode_vert_av8: |
| |
| push_v_regs |
| |
| |
| add x0, x0, #17 |
| ld1 {v0.8b, v1.8b}, [x0] |
| |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| st1 {v0.8b, v1.8b}, [x1], x3 |
| |
| pop_v_regs |
| ret |
| |
| |
| |
| |
| |
| ///****************************************************************************** |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //*ih264_intra_pred_luma_16x16_mode_horz |
| //* |
| //* @brief |
| //* Perform Intra prediction for luma_16x16 mode:horizontal |
| //* |
| //* @par Description: |
| //* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 |
| //* |
| //* @param[in] pu1_src |
| //* UWORD8 pointer to the source |
| //* |
| //* @param[out] pu1_dst |
| //* UWORD8 pointer to the destination |
| //* |
| //* @param[in] src_strd |
| //* integer source stride |
| //* |
| //* @param[in] dst_strd |
| //* integer destination stride |
| //* |
| //* @param[in] ui_neighboravailability |
| //* availability of neighbouring pixels(Not used in this function) |
| //* |
| //* @returns |
| //* |
| //* @remarks |
| //* None |
| //* |
| //******************************************************************************* |
| //*/ |
| //void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, |
| // UWORD8 *pu1_dst, |
| // WORD32 src_strd, |
| // WORD32 dst_strd, |
| // WORD32 ui_neighboravailability) |
| //**************Variables Vs Registers***************************************** |
| // x0 => *pu1_src |
| // x1 => *pu1_dst |
| // x2 => src_strd |
| // x3 => dst_strd |
| // x4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_luma_16x16_mode_horz_av8 |
| |
| ih264_intra_pred_luma_16x16_mode_horz_av8: |
| |
| |
| |
| push_v_regs |
| |
| ld1 {v0.16b}, [x0] |
| |
| |
| |
| dup v10.16b, v0.b[15] |
| dup v11.16b, v0.b[14] |
| dup v12.16b, v0.b[13] |
| dup v13.16b, v0.b[12] |
| st1 {v10.16b}, [x1], x3 |
| dup v14.16b, v0.b[11] |
| st1 {v11.16b}, [x1], x3 |
| dup v15.16b, v0.b[10] |
| st1 {v12.16b}, [x1], x3 |
| dup v16.16b, v0.b[9] |
| st1 {v13.16b}, [x1], x3 |
| dup v17.16b, v0.b[8] |
| st1 {v14.16b}, [x1], x3 |
| dup v18.16b, v0.b[7] |
| st1 {v15.16b}, [x1], x3 |
| dup v19.16b, v0.b[6] |
| st1 {v16.16b}, [x1], x3 |
| dup v20.16b, v0.b[5] |
| st1 {v17.16b}, [x1], x3 |
| dup v21.16b, v0.b[4] |
| st1 {v18.16b}, [x1], x3 |
| dup v22.16b, v0.b[3] |
| st1 {v19.16b}, [x1], x3 |
| dup v23.16b, v0.b[2] |
| st1 {v20.16b}, [x1], x3 |
| dup v24.16b, v0.b[1] |
| st1 {v21.16b}, [x1], x3 |
| dup v25.16b, v0.b[0] |
| st1 {v22.16b}, [x1], x3 |
| st1 {v23.16b}, [x1], x3 |
| st1 {v24.16b}, [x1], x3 |
| st1 {v25.16b}, [x1], x3 |
| |
| pop_v_regs |
| ret |
| |
| |
| |
| |
| |
| |
| |
| ///****************************************************************************** |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //*ih264_intra_pred_luma_16x16_mode_dc |
| //* |
| //* @brief |
| //* Perform Intra prediction for luma_16x16 mode:DC |
| //* |
| //* @par Description: |
| //* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 |
| //* |
| //* @param[in] pu1_src |
| //* UWORD8 pointer to the source |
| //* |
| //* @param[out] pu1_dst |
| //* UWORD8 pointer to the destination |
| //* |
| //* @param[in] src_strd |
| //* integer source stride |
| //* |
| //* @param[in] dst_strd |
| //* integer destination stride |
| //* |
| //* @param[in] ui_neighboravailability |
| //* availability of neighbouring pixels |
| //* |
| //* @returns |
| //* |
| //* @remarks |
| //* None |
| //* |
| //*******************************************************************************/ |
| //void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, |
| // UWORD8 *pu1_dst, |
| // WORD32 src_strd, |
| // WORD32 dst_strd, |
| // WORD32 ui_neighboravailability) |
| |
| //**************Variables Vs Registers***************************************** |
| // x0 => *pu1_src |
| // x1 => *pu1_dst |
| // x2 => src_strd |
| // x3 => dst_strd |
| // x4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_luma_16x16_mode_dc_av8 |
| |
| ih264_intra_pred_luma_16x16_mode_dc_av8: |
| |
| |
| |
| push_v_regs |
| stp x19, x20, [sp, #-16]! |
| |
| sub v0.16b, v0.16b, v0.16b |
| sub v1.16b, v1.16b, v1.16b |
| mov w10, #0 |
| mov w11 , #3 |
| ands x6, x4, #0x01 |
| beq top_available //LEFT NOT AVAILABLE |
| ld1 {v0.16b}, [x0] |
| add w10, w10, #8 |
| add w11, w11, #1 |
| top_available: |
| ands x6, x4, #0x04 |
| beq none_available |
| add x6, x0, #17 |
| ld1 {v1.16b}, [x6] |
| add w10, w10, #8 |
| add w11, w11, #1 |
| b summation |
| none_available: |
| cmp x4, #0 |
| bne summation |
| mov w15, #128 |
| dup v20.16b, w15 |
| b store |
| summation: |
| uaddl v2.8h, v0.8b, v1.8b |
| uaddl2 v3.8h, v0.16b, v1.16b |
| dup v10.8h, w10 |
| neg w11, w11 |
| dup v20.8h, w11 |
| add v0.8h, v2.8h, v3.8h |
| mov v1.d[0], v0.d[1] |
| add v0.4h, v0.4h, v1.4h |
| addp v0.4h, v0.4h , v0.4h |
| addp v0.4h, v0.4h , v0.4h |
| add v0.4h, v0.4h, v10.4h |
| uqshl v0.8h, v0.8h, v20.8h |
| sqxtun v0.8b, v0.8h |
| dup v20.16b, v0.b[0] |
| |
| store: |
| |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| st1 { v20.16b}, [x1], x3 |
| |
| |
| |
| end_func: |
| |
| ldp x19, x20, [sp], #16 |
| pop_v_regs |
| ret |
| |
| |
| |
| |
| |
| ///****************************************************************************** |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //*ih264_intra_pred_luma_16x16_mode_plane |
| //* |
| //* @brief |
| //* Perform Intra prediction for luma_16x16 mode:PLANE |
| //* |
| //* @par Description: |
| //* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 |
| //* |
| //* @param[in] pu1_src |
| //* UWORD8 pointer to the source |
| //* |
| //* @param[out] pu1_dst |
| //* UWORD8 pointer to the destination |
| //* |
| //* @param[in] src_strd |
| //* integer source stride |
| //* |
| //* @param[in] dst_strd |
| //* integer destination stride |
| //* |
| //* @param[in] ui_neighboravailability |
| //* availability of neighbouring pixels |
| //* |
| //* @returns |
| //* |
| //* @remarks |
| //* None |
| //* |
| //*******************************************************************************/ |
| //void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, |
| // UWORD8 *pu1_dst, |
| // WORD32 src_strd, |
| // WORD32 dst_strd, |
| // WORD32 ui_neighboravailability) |
| |
| //**************Variables Vs Registers***************************************** |
| // x0 => *pu1_src |
| // x1 => *pu1_dst |
| // x2 => src_strd |
| // x3 => dst_strd |
| // x4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_luma_16x16_mode_plane_av8 |
| ih264_intra_pred_luma_16x16_mode_plane_av8: |
| |
| push_v_regs |
| stp x19, x20, [sp, #-16]! |
| mov x2, x1 |
| add x1, x0, #17 |
| add x0, x0, #15 |
| mov x8, #9 |
| sub x1, x1, #1 |
| mov x10, x1 //top_left |
| mov x4, #-1 |
| ld1 {v2.2s}, [x1], x8 |
| |
| adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs |
| ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] |
| |
| ld1 {v0.2s}, [x1] |
| rev64 v2.8b, v2.8b |
| ld1 {v6.2s, v7.2s}, [x7] |
| usubl v0.8h, v0.8b, v2.8b |
| uxtl v16.8h, v6.8b |
| mul v0.8h, v0.8h , v16.8h |
| uxtl v18.8h, v7.8b |
| add x7, x0, x4, lsl #3 |
| sub x0, x7, x4, lsl #1 |
| sub x20, x4, #0x0 |
| neg x14, x20 |
| addp v0.8h, v0.8h, v1.8h |
| ldrb w8, [x7], #-1 |
| sxtw x8, w8 |
| ldrb w9, [x0], #1 |
| sxtw x9, w9 |
| saddlp v0.2s, v0.4h |
| sub x12, x8, x9 |
| ldrb w8, [x7], #-1 |
| sxtw x8, w8 |
| saddlp v0.1d, v0.2s |
| ldrb w9, [x0], #1 |
| sxtw x9, w9 |
| sub x8, x8, x9 |
| shl v2.2s, v0.2s, #2 |
| add x12, x12, x8, lsl #1 |
| add v0.2s, v0.2s , v2.2s |
| ldrb w8, [x7], #-1 |
| sxtw x8, w8 |
| ldrb w9, [x0], #1 |
| sxtw x9, w9 |
| srshr v0.2s, v0.2s, #6 // i_b = D0[0] |
| sub x8, x8, x9 |
| ldrb w5, [x7], #-1 |
| sxtw x5, w5 |
| add x8, x8, x8, lsl #1 |
| dup v4.8h, v0.h[0] |
| add x12, x12, x8 |
| ldrb w9, [x0], #1 |
| sxtw x9, w9 |
| mul v0.8h, v4.8h , v16.8h |
| sub x5, x5, x9 |
| mul v2.8h, v4.8h , v18.8h |
| add x12, x12, x5, lsl #2 |
| ldrb w8, [x7], #-1 |
| sxtw x8, w8 |
| ldrb w9, [x0], #1 |
| sxtw x9, w9 |
| sub x8, x8, x9 |
| ldrb w5, [x7], #-1 |
| sxtw x5, w5 |
| add x8, x8, x8, lsl #2 |
| ldrb w6, [x0], #1 |
| sxtw x6, w6 |
| add x12, x12, x8 |
| ldrb w8, [x7], #-1 |
| sxtw x8, w8 |
| ldrb w9, [x0], #1 |
| sxtw x9, w9 |
| sub x5, x5, x6 |
| sub x8, x8, x9 |
| add x5, x5, x5, lsl #1 |
| sub x20, x8, x8, lsl #3 |
| neg x8, x20 |
| add x12, x12, x5, lsl #1 |
| ldrb w5, [x7], #-1 |
| sxtw x5, w5 |
| ldrb w6, [x10] //top_left |
| sxtw x6, w6 |
| add x12, x12, x8 |
| sub x9, x5, x6 |
| ldrb w6, [x1, #7] |
| sxtw x6, w6 |
| add x12, x12, x9, lsl #3 // i_c = x12 |
| add x8, x5, x6 |
| add x12, x12, x12, lsl #2 |
| lsl x8, x8, #4 // i_a = x8 |
| add x12, x12, #0x20 |
| lsr x12, x12, #6 |
| shl v28.8h, v4.8h, #3 |
| dup v6.8h, w12 |
| dup v30.8h, w8 |
| shl v26.8h, v6.8h, #3 |
| sub v30.8h, v30.8h , v28.8h |
| sub v30.8h, v30.8h , v26.8h |
| add v28.8h, v30.8h , v6.8h |
| add v26.8h, v28.8h , v0.8h |
| add v28.8h, v28.8h , v2.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v20.8b, v26.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| sqrshrun v21.8b, v28.8h, #5 |
| add v26.8h, v26.8h , v6.8h |
| add v28.8h, v28.8h , v6.8h |
| sqrshrun v22.8b, v26.8h, #5 |
| st1 {v20.2s, v21.2s}, [x2], x3 |
| sqrshrun v23.8b, v28.8h, #5 |
| st1 {v22.2s, v23.2s}, [x2], x3 |
| |
| end_func_plane: |
| |
| ldp x19, x20, [sp], #16 |
| pop_v_regs |
| ret |
| |