blob: 62e8cee9f25f9bd1fe402ba3853d50a4c79e817d [file] [log] [blame]
//******************************************************************************
//*
//* Copyright (C) 2015 The Android Open Source Project
//*
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//*
//* http://www.apache.org/licenses/LICENSE-2.0
//*
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//*
//*****************************************************************************
//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
///**
//******************************************************************************
//* @file
//* ih264_intra_pred_luma_4x4_av8.s
//*
//* @brief
//* Contains function definitions for intra 4x4 Luma prediction .
//*
//* @author
//* Ittiam
//*
//* @par List of Functions:
//*
//* -ih264_intra_pred_luma_4x4_mode_vert_av8
//* -ih264_intra_pred_luma_4x4_mode_horz_av8
//* -ih264_intra_pred_luma_4x4_mode_dc_av8
//* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8
//* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8
//* -ih264_intra_pred_luma_4x4_mode_vert_r_av8
//* -ih264_intra_pred_luma_4x4_mode_horz_d_av8
//* -ih264_intra_pred_luma_4x4_mode_vert_l_av8
//* -ih264_intra_pred_luma_4x4_mode_horz_u_av8
//*
//* @remarks
//* None
//*
//*******************************************************************************
//*/
///* All the functions here are replicated from ih264_intra_pred_filters.c
//
///**
///**
///**
//
.text
.p2align 2
.include "ih264_neon_macros.s"
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_vert
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:vertical
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels(Not used in this function)
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************
//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_vert_av8
ih264_intra_pred_luma_4x4_mode_vert_av8:
push_v_regs
add x0, x0, #5
ld1 {v0.s}[0], [x0]
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
pop_v_regs
ret
///******************************************************************************
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_horz
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:horizontal
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels(Not used in this function)
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************
//*/
//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_horz_av8
ih264_intra_pred_luma_4x4_mode_horz_av8:
push_v_regs
ld1 {v1.s}[0], [x0]
dup v0.8b, v1.b[3]
dup v2.8b, v1.b[2]
st1 {v0.s}[0], [x1], x3
dup v3.8b, v1.b[1]
st1 {v2.s}[0], [x1], x3
dup v4.8b, v1.b[0]
st1 {v3.s}[0], [x1], x3
st1 {v4.s}[0], [x1], x3
pop_v_regs
ret
///******************************************************************************
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_dc
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:DC
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_dc_av8
ih264_intra_pred_luma_4x4_mode_dc_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
ands x5, x4, #0x01
beq top_available //LEFT NOT AVAILABLE
add x10, x0, #3
mov x2, #-1
ldrb w5, [x10], #-1
sxtw x5, w5
ldrb w6, [x10], #-1
sxtw x6, w6
ldrb w7, [x10], #-1
sxtw x7, w7
add x5, x5, x6
ldrb w8, [x10], #-1
sxtw x8, w8
add x5, x5, x7
ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
add x5, x5, x8
beq left_available
add x10, x0, #5
// BOTH LEFT AND TOP AVAILABLE
ldrb w6, [x10], #1
sxtw x6, w6
ldrb w7, [x10], #1
sxtw x7, w7
add x5, x5, x6
ldrb w8, [x10], #1
sxtw x8, w8
add x5, x5, x7
ldrb w9, [x10], #1
sxtw x9, w9
add x5, x5, x8
add x5, x5, x9
add x5, x5, #4
lsr x5, x5, #3
dup v0.8b, w5
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
b end_func
top_available: // ONLT TOP AVAILABLE
ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
beq none_available
add x10, x0, #5
ldrb w6, [x10], #1
sxtw x6, w6
ldrb w7, [x10], #1
sxtw x7, w7
ldrb w8, [x10], #1
sxtw x8, w8
add x5, x6, x7
ldrb w9, [x10], #1
sxtw x9, w9
add x5, x5, x8
add x5, x5, x9
add x5, x5, #2
lsr x5, x5, #2
dup v0.8b, w5
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
b end_func
left_available: //ONLY LEFT AVAILABLE
add x5, x5, #2
lsr x5, x5, #2
dup v0.8b, w5
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
b end_func
none_available: //NONE AVAILABLE
mov x5, #128
dup v0.8b, w5
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
b end_func
end_func:
ldp x19, x20, [sp], #16
pop_v_regs
ret
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_diag_dl
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_diag_dl_av8
ih264_intra_pred_luma_4x4_mode_diag_dl_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
add x0, x0, #5
sub x5, x3, #2
add x6, x0, #7
ld1 {v0.8b}, [x0]
ext v1.8b, v0.8b , v0.8b , #1
ext v2.8b, v0.8b , v0.8b , #2
ld1 {v2.b}[6], [x6]
uaddl v20.8h, v0.8b, v1.8b
uaddl v22.8h, v1.8b, v2.8b
add v24.8h, v20.8h , v22.8h
sqrshrun v3.8b, v24.8h, #2
st1 {v3.s}[0], [x1], x3
ext v4.8b, v3.8b , v3.8b , #1
st1 {v4.s}[0], [x1], x3
st1 {v3.h}[1], [x1], #2
st1 {v3.h}[2], [x1], x5
st1 {v4.h}[1], [x1], #2
st1 {v4.h}[2], [x1]
end_func_diag_dl:
ldp x19, x20, [sp], #16
pop_v_regs
ret
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_diag_dr
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_diag_dr_av8
ih264_intra_pred_luma_4x4_mode_diag_dr_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
ld1 {v0.8b}, [x0]
add x0, x0, #1
ld1 {v1.8b}, [x0]
ext v2.8b, v1.8b , v1.8b , #1
uaddl v20.8h, v0.8b, v1.8b
uaddl v22.8h, v1.8b, v2.8b
add v24.8h, v20.8h , v22.8h
sqrshrun v3.8b, v24.8h, #2
ext v4.8b, v3.8b , v3.8b , #1
sub x5, x3, #2
st1 {v4.h}[1], [x1], #2
st1 {v4.h}[2], [x1], x5
st1 {v3.h}[1], [x1], #2
st1 {v3.h}[2], [x1], x5
st1 {v4.s}[0], [x1], x3
st1 {v3.s}[0], [x1], x3
end_func_diag_dr:
ldp x19, x20, [sp], #16
pop_v_regs
ret
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_vert_r
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:Vertical_Right
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_vert_r_av8
ih264_intra_pred_luma_4x4_mode_vert_r_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
ld1 {v0.8b}, [x0]
add x0, x0, #1
ld1 {v1.8b}, [x0]
ext v2.8b, v1.8b , v1.8b , #1
uaddl v20.8h, v0.8b, v1.8b
uaddl v22.8h, v1.8b, v2.8b
add v24.8h, v20.8h , v22.8h
sqrshrun v4.8b, v20.8h, #1
sqrshrun v3.8b, v24.8h, #2
sub x5, x3, #2
ext v5.8b, v3.8b , v3.8b , #3
st1 {v4.s}[1], [x1], x3
st1 {v5.s}[0], [x1], x3
sub x8, x3, #3
st1 {v3.b}[2], [x1], #1
st1 {v4.h}[2], [x1], #2
st1 {v4.b}[6], [x1], x8
st1 {v3.b}[1], [x1], #1
st1 {v5.h}[0], [x1], #2
st1 {v5.b}[2], [x1]
end_func_vert_r:
ldp x19, x20, [sp], #16
pop_v_regs
ret
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_horz_d
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_horz_d_av8
ih264_intra_pred_luma_4x4_mode_horz_d_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
ld1 {v0.8b}, [x0]
add x0, x0, #1
ld1 {v1.8b}, [x0]
ext v2.8b, v1.8b , v0.8b , #1
uaddl v20.8h, v0.8b, v1.8b
uaddl v22.8h, v1.8b, v2.8b
add v24.8h, v20.8h , v22.8h
sqrshrun v4.8b, v20.8h, #1
sqrshrun v5.8b, v24.8h, #2
sub x5, x3, #2
mov v6.8b, v5.8b
trn1 v10.8b, v4.8b, v5.8b
trn2 v5.8b, v4.8b, v5.8b //
mov v4.8b, v10.8b
st1 {v5.h}[1], [x1], #2
st1 {v6.h}[2], [x1], x5
st1 {v4.h}[1], [x1], #2
st1 {v5.h}[1], [x1], x5
st1 {v5.h}[0], [x1], #2
st1 {v4.h}[1], [x1], x5
st1 {v4.h}[0], [x1], #2
st1 {v5.h}[0], [x1], x5
end_func_horz_d:
ldp x19, x20, [sp], #16
pop_v_regs
ret
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_vert_l
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:Vertical_Left
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_vert_l_av8
ih264_intra_pred_luma_4x4_mode_vert_l_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
add x0, x0, #4
ld1 {v0.8b}, [x0]
add x0, x0, #1
ld1 {v1.8b}, [x0]
ext v2.8b, v1.8b , v0.8b , #1
uaddl v20.8h, v0.8b, v1.8b
uaddl v22.8h, v1.8b, v2.8b
add v24.8h, v20.8h , v22.8h
sqrshrun v4.8b, v20.8h, #1
sqrshrun v5.8b, v24.8h, #2
ext v6.8b, v4.8b , v4.8b , #1
ext v7.8b, v5.8b , v5.8b , #1
st1 {v6.s}[0], [x1], x3
ext v8.8b, v4.8b , v4.8b , #2
ext v9.8b, v5.8b , v5.8b , #2
st1 {v7.s}[0], [x1], x3
st1 {v8.s}[0], [x1], x3
st1 {v9.s}[0], [x1], x3
end_func_vert_l:
ldp x19, x20, [sp], #16
pop_v_regs
ret
///**
//*******************************************************************************
//*
//*ih264_intra_pred_luma_4x4_mode_horz_u
//*
//* @brief
//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up
//*
//* @par Description:
//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
//*
//* @param[in] pu1_src
//* UWORD8 pointer to the source
//*
//* @param[out] pu1_dst
//* UWORD8 pointer to the destination
//*
//* @param[in] src_strd
//* integer source stride
//*
//* @param[in] dst_strd
//* integer destination stride
//*
//* @param[in] ui_neighboravailability
//* availability of neighbouring pixels
//*
//* @returns
//*
//* @remarks
//* None
//*
//*******************************************************************************/
//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
// WORD32 ui_neighboravailability)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
// x2 => src_strd
// x3 => dst_strd
// x4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_horz_u_av8
ih264_intra_pred_luma_4x4_mode_horz_u_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
mov x10, x0
ld1 {v0.8b}, [x0]
ldrb w9, [x0], #1
sxtw x9, w9
ext v1.8b, v0.8b , v0.8b , #1
ld1 {v0.b}[7], [x10]
ext v2.8b, v1.8b , v1.8b , #1
uaddl v20.8h, v0.8b, v1.8b
uaddl v22.8h, v1.8b, v2.8b
add v24.8h, v20.8h , v22.8h
sqrshrun v4.8b, v20.8h, #1
sqrshrun v5.8b, v24.8h, #2
mov v6.8b, v4.8b
ext v6.8b, v5.8b , v4.8b , #1
st1 {v4.b}[2], [x1], #1
st1 {v6.b}[0], [x1], #1
trn1 v10.8b, v6.8b, v5.8b
trn2 v5.8b, v6.8b, v5.8b //
mov v6.8b , v10.8b
sub x5, x3, #2
trn1 v10.8b, v4.8b, v6.8b
trn2 v6.8b, v4.8b, v6.8b //
mov v4.8b , v10.8b
dup v7.8b, w9
st1 {v6.h}[0], [x1], x5
st1 {v6.h}[0], [x1], #2
st1 {v5.h}[3], [x1], x5
st1 {v5.h}[3], [x1], #2
st1 {v7.h}[3], [x1], x5
st1 {v7.s}[0], [x1], x3
end_func_horz_u:
ldp x19, x20, [sp], #16
pop_v_regs
ret