blob: 7d1e4079b65fe0751de561d724f78ef4fd567b5b [file] [log] [blame]
/******************************************************************************
*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* ih264_inter_pred_filters.c
*
* @brief
* Contains function definitions for inter prediction interpolation filters
*
* @author
* Ittiam
*
* @par List of Functions:
* - ih264_inter_pred_luma_copy
* - ih264_interleave_copy
* - ih264_inter_pred_luma_horz
* - ih264_inter_pred_luma_vert
* - ih264_inter_pred_luma_horz_hpel_vert_hpel
* - ih264_inter_pred_luma_horz_qpel
* - ih264_inter_pred_luma_vert_qpel
* - ih264_inter_pred_luma_horz_qpel_vert_qpel
* - ih264_inter_pred_luma_horz_hpel_vert_qpel
* - ih264_inter_pred_luma_horz_qpel_vert_hpel
* - ih264_inter_pred_luma_bilinear
* - ih264_inter_pred_chroma
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* User include files */
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
#include "ih264_inter_pred_filters.h"
/*****************************************************************************/
/* Constant Data variables */
/*****************************************************************************/
/* coefficients for 6 tap filtering*/
const WORD32 ih264_g_six_tap[3] ={1,-5,20};
/*****************************************************************************/
/* Function definitions . */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* Interprediction luma function for copy
*
* @par Description:
* Copies the array of width 'wd' and height 'ht' from the location pointed
* by 'src' to the location pointed by 'dst'
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void ih264_inter_pred_luma_copy(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
UNUSED(pu1_tmp);
UNUSED(dydx);
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
pu1_dst[col] = pu1_src[col];
}
pu1_src += src_strd;
pu1_dst += dst_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Fucntion for copying to an interleaved destination
*
* @par Description:
* Copies the array of width 'wd' and height 'ht' from the location pointed
* by 'src' to the location pointed by 'dst'
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* The alternate elements of src will be copied to alternate locations in dsr
* Other locations are not touched
*
*******************************************************************************
*/
void ih264_interleave_copy(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd)
{
WORD32 row, col;
wd *= 2;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col+=2)
{
pu1_dst[col] = pu1_src[col];
}
pu1_src += src_strd;
pu1_dst += dst_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Interprediction luma filter for horizontal input
*
* @par Description:
* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
* sec 8.4.2.2.1 titled "Luma sample interpolation process"
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void ih264_inter_pred_luma_horz(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD16 i2_tmp;
UNUSED(pu1_tmp);
UNUSED(dydx);
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
i2_tmp = ih264_g_six_tap[0] *
(pu1_src[col - 2] + pu1_src[col + 3])
+ ih264_g_six_tap[1] *
(pu1_src[col - 1] + pu1_src[col + 2])
+ ih264_g_six_tap[2] *
(pu1_src[col] + pu1_src[col + 1]);
i2_tmp = (i2_tmp + 16) >> 5;
pu1_dst[col] = CLIP_U8(i2_tmp);
}
pu1_src += src_strd;
pu1_dst += dst_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Interprediction luma filter for vertical input
*
* @par Description:
* Applies a 6 tap vertical filter.The output is clipped to 8 bits
* sec 8.4.2.2.1 titled "Luma sample interpolation process"
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void ih264_inter_pred_luma_vert(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD16 i2_tmp;
UNUSED(pu1_tmp);
UNUSED(dydx);
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/
i2_tmp = ih264_g_six_tap[0] *
(pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
+ ih264_g_six_tap[1] *
(pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
+ ih264_g_six_tap[2] *
(pu1_src[col] + pu1_src[col + 1 * src_strd]);
i2_tmp = (i2_tmp + 16) >> 5;
pu1_dst[col] = CLIP_U8(i2_tmp);
}
pu1_src += src_strd;
pu1_dst += dst_strd;
}
}
/*!
**************************************************************************
* \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif
*
* \brief
* This function implements a two stage cascaded six tap filter. It
* applies the six tap filter in the horizontal direction on the
* predictor values, followed by applying the same filter in the
* vertical direction on the output of the first stage. The six tap
* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
* interpolation process"
*
* \param pu1_src: Pointer to the buffer containing the predictor values.
* pu1_src could point to the frame buffer or the predictor buffer.
* \param pu1_dst: Pointer to the destination buffer where the output of
* the six tap filter is stored.
* \param ht: Height of the rectangular pixel grid to be interpolated
* \param wd: Width of the rectangular pixel grid to be interpolated
* \param src_strd: Width of the buffer pointed to by pu1_src.
* \param dst_strd: Width of the destination buffer
* \param pu1_tmp: temporary buffer.
* \param dydx: x and y reference offset for qpel calculations: UNUSED in this function.
*
* \return
* None.
*
* \note
* This function takes the 8 bit predictor values, applies the six tap
* filter in the horizontal direction and outputs the result clipped to
* 8 bit precision. The input is stored in the buffer pointed to by
* pu1_src while the output is stored in the buffer pointed by pu1_dst.
* Both pu1_src and pu1_dst could point to the same buffer i.e. the
* six tap filter could be done in place.
*
**************************************************************************
*/
void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD32 tmp;
WORD16* pi2_pred1_temp;
WORD16* pi2_pred1;
UNUSED(dydx);
pi2_pred1_temp = (WORD16*)pu1_tmp;
pi2_pred1_temp += 2;
pi2_pred1 = pi2_pred1_temp;
for(row = 0; row < ht; row++)
{
for(col = -2; col < wd + 3; col++)
{
tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
tmp = ih264_g_six_tap[0] *
(pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
+ ih264_g_six_tap[1] *
(pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
+ ih264_g_six_tap[2] *
(pu1_src[col] + pu1_src[col + 1 * src_strd]);
pi2_pred1_temp[col] = tmp;
}
pu1_src += src_strd;
pi2_pred1_temp = pi2_pred1_temp + wd + 5;
}
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
tmp = ih264_g_six_tap[0] *
(pi2_pred1[col - 2] + pi2_pred1[col + 3])
+ ih264_g_six_tap[1] *
(pi2_pred1[col - 1] + pi2_pred1[col + 2])
+ ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
tmp = (tmp + 512) >> 10;
pu1_dst[col] = CLIP_U8(tmp);
}
pi2_pred1 += (wd + 5);
pu1_dst += dst_strd;
}
}
/*!
**************************************************************************
* \if Function name : ih264_inter_pred_luma_horz_qpel \endif
*
* \brief
* This routine applies the six tap filter to the predictors in the
* horizontal direction. The six tap filtering operation is described in
* sec 8.4.2.2.1 titled "Luma sample interpolation process"
*
* \param pu1_src: Pointer to the buffer containing the predictor values.
* pu1_src could point to the frame buffer or the predictor buffer.
* \param pu1_dst: Pointer to the destination buffer where the output of
* the six tap filter is stored.
* \param ht: Height of the rectangular pixel grid to be interpolated
* \param wd: Width of the rectangular pixel grid to be interpolated
* \param src_strd: Width of the buffer pointed to by pu1_src.
* \param dst_strd: Width of the destination buffer
* \param pu1_tmp: temporary buffer: UNUSED in this function
* \param dydx: x and y reference offset for qpel calculations.
*
* \return
* None.
*
* \note
* This function takes the 8 bit predictor values, applies the six tap
* filter in the horizontal direction and outputs the result clipped to
* 8 bit precision. The input is stored in the buffer pointed to by
* pu1_src while the output is stored in the buffer pointed by pu1_dst.
* Both pu1_src and pu1_dst could point to the same buffer i.e. the
* six tap filter could be done in place.
*
**************************************************************************
*/
void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
UWORD8 *pu1_pred1;
WORD32 x_offset = dydx & 0x3;
UNUSED(pu1_tmp);
pu1_pred1 = pu1_src + (x_offset >> 1);
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
{
WORD16 i2_temp;
/* The logic below implements the following equation
i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
i2_temp = pu1_src[-2] + pu1_src[3]
- (pu1_src[-1] + pu1_src[2])
+ ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
+ ((pu1_src[0] + pu1_src[1]) << 4);
i2_temp = (i2_temp + 16) >> 5;
i2_temp = CLIP_U8(i2_temp);
*pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
pu1_pred1++;
}
pu1_dst += dst_strd - wd;
pu1_src += src_strd - wd;
pu1_pred1 += src_strd - wd;
}
}
/*!
**************************************************************************
* \if Function name : ih264_inter_pred_luma_vert_qpel \endif
*
* \brief
* This routine applies the six tap filter to the predictors in the
* vertical direction and interpolates them to obtain pixels at quarter vertical
* positions (0, 1/4) and (0, 3/4). The six tap filtering operation is
* described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
*
* \param pu1_src: Pointer to the buffer containing the predictor values.
* pu1_src could point to the frame buffer or the predictor buffer.
* \param pu1_dst: Pointer to the destination buffer where the output of
* the six tap filter is stored.
* \param ht: Height of the rectangular pixel grid to be interpolated
* \param wd: Width of the rectangular pixel grid to be interpolated
* \param src_strd: Width of the buffer pointed to by puc_pred.
* \param dst_strd: Width of the destination buffer
* \param pu1_tmp: temporary buffer: UNUSED in this function
* \param dydx: x and y reference offset for qpel calculations.
*
* \return
* void
*
* \note
* This function takes the 8 bit predictor values, applies the six tap
* filter in the vertical direction and outputs the result clipped to
* 8 bit precision. The input is stored in the buffer pointed to by
* puc_pred while the output is stored in the buffer pointed by puc_dest.
* Both puc_pred and puc_dest could point to the same buffer i.e. the
* six tap filter could be done in place.
*
* \para <title>
* <paragraph>
* ...
**************************************************************************
*/
void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD32 y_offset = dydx >> 2;
WORD32 off1, off2, off3;
UWORD8 *pu1_pred1;
UNUSED(pu1_tmp);
y_offset = y_offset & 0x3;
off1 = src_strd;
off2 = src_strd << 1;
off3 = off1 + off2;
pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
{
WORD16 i2_temp;
/* The logic below implements the following equation
i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) +
20 * (puc_pred[0] + puc_pred[src_strd]); */
i2_temp = pu1_src[-off2] + pu1_src[off3]
- (pu1_src[-off1] + pu1_src[off2])
+ ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
+ ((pu1_src[0] + pu1_src[off1]) << 4);
i2_temp = (i2_temp + 16) >> 5;
i2_temp = CLIP_U8(i2_temp);
*pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
}
pu1_src += src_strd - wd;
pu1_pred1 += src_strd - wd;
pu1_dst += dst_strd - wd;
}
}
/*!
**************************************************************************
* \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif
*
* \brief
* This routine applies the six tap filter to the predictors in the
* vertical and horizontal direction and averages them to get pixels at locations
* (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation
* is described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
*
* \param pu1_src: Pointer to the buffer containing the predictor values.
* pu1_src could point to the frame buffer or the predictor buffer.
* \param pu1_dst: Pointer to the destination buffer where the output of
* the six tap filter is stored.
* \param wd: Width of the rectangular pixel grid to be interpolated
* \param ht: Height of the rectangular pixel grid to be interpolated
* \param src_strd: Width of the buffer pointed to by puc_pred.
* \param dst_strd: Width of the destination buffer
* \param pu1_tmp: temporary buffer, UNUSED in this function
* \param dydx: x and y reference offset for qpel calculations.
*
* \return
* void
*
* \note
* This function takes the 8 bit predictor values, applies the six tap
* filter in the vertical direction and outputs the result clipped to
* 8 bit precision. The input is stored in the buffer pointed to by
* puc_pred while the output is stored in the buffer pointed by puc_dest.
* Both puc_pred and puc_dest could point to the same buffer i.e. the
* six tap filter could be done in place.
*
* \para <title>
* <paragraph>
* ...
**************************************************************************
*/
void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD32 x_offset = dydx & 0x3;
WORD32 y_offset = dydx >> 2;
WORD32 off1, off2, off3;
UWORD8* pu1_pred_vert, *pu1_pred_horz;
UNUSED(pu1_tmp);
y_offset = y_offset & 0x3;
off1 = src_strd;
off2 = src_strd << 1;
off3 = off1 + off2;
pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
pu1_pred_vert = pu1_src + (x_offset >> 1);
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd;
col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
{
WORD16 i2_temp_vert, i2_temp_horz;
/* The logic below implements the following equation
i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) +
20 * (puc_pred[0] + puc_pred[src_strd]); */
i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
- (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
+ ((pu1_pred_vert[0] + pu1_pred_vert[off1]
- pu1_pred_vert[-off1]
- pu1_pred_vert[off2]) << 2)
+ ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
i2_temp_vert = (i2_temp_vert + 16) >> 5;
i2_temp_vert = CLIP_U8(i2_temp_vert);
/* The logic below implements the following equation
i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
- (pu1_pred_horz[-1] + pu1_pred_horz[2])
+ ((pu1_pred_horz[0] + pu1_pred_horz[1]
- pu1_pred_horz[-1]
- pu1_pred_horz[2]) << 2)
+ ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
i2_temp_horz = (i2_temp_horz + 16) >> 5;
i2_temp_horz = CLIP_U8(i2_temp_horz);
*pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
}
pu1_pred_vert += (src_strd - wd);
pu1_pred_horz += (src_strd - wd);
pu1_dst += (dst_strd - wd);
}
}
/*!
**************************************************************************
* \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif
*
* \brief
* This routine applies the six tap filter to the predictors in the vertical
* and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates
* pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2).
* The six tap filtering operation is described in sec 8.4.2.2.1 titled
* "Luma sample interpolation process"
*
* \param pu1_src: Pointer to the buffer containing the predictor values.
* pu1_src could point to the frame buffer or the predictor buffer.
* \param pu1_dst: Pointer to the destination buffer where the output of
* the six tap filter followed by interpolation is stored.
* \param wd: Width of the rectangular pixel grid to be interpolated
* \param ht: Height of the rectangular pixel grid to be interpolated
* \param src_strd: Width of the buffer pointed to by puc_pred.
* \param dst_strd: Width of the destination buffer
* \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
* \param dydx: x and y reference offset for qpel calculations.
*
* \return
* void
*
* \note
* This function takes the 8 bit predictor values, applies the six tap
* filter in the vertical direction and outputs the result clipped to
* 8 bit precision. The input is stored in the buffer pointed to by
* puc_pred while the output is stored in the buffer pointed by puc_dest.
* Both puc_pred and puc_dest could point to the same buffer i.e. the
* six tap filter could be done in place.
*
* \para <title>
* <paragraph>
* ...
**************************************************************************
*/
void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD32 tmp;
WORD16* pi2_pred1_temp, *pi2_pred1;
UWORD8* pu1_dst_tmp;
WORD32 x_offset = dydx & 0x3;
WORD16 i2_macro;
pi2_pred1_temp = (WORD16*)pu1_tmp;
pi2_pred1_temp += 2;
pi2_pred1 = pi2_pred1_temp;
pu1_dst_tmp = pu1_dst;
for(row = 0; row < ht; row++)
{
for(col = -2; col < wd + 3; col++)
{
tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
tmp = ih264_g_six_tap[0] *
(pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
+ ih264_g_six_tap[1] *
(pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
+ ih264_g_six_tap[2] *
(pu1_src[col] + pu1_src[col + 1 * src_strd]);
pi2_pred1_temp[col] = tmp;
}
pu1_src += src_strd;
pi2_pred1_temp = pi2_pred1_temp + wd + 5;
}
pi2_pred1_temp = pi2_pred1;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
tmp = ih264_g_six_tap[0] *
(pi2_pred1[col - 2] + pi2_pred1[col + 3])
+ ih264_g_six_tap[1] *
(pi2_pred1[col - 1] + pi2_pred1[col + 2])
+ ih264_g_six_tap[2] *
(pi2_pred1[col] + pi2_pred1[col + 1]);
tmp = (tmp + 512) >> 10;
pu1_dst[col] = CLIP_U8(tmp);
}
pi2_pred1 += (wd + 5);
pu1_dst += dst_strd;
}
pu1_dst = pu1_dst_tmp;
pi2_pred1_temp += (x_offset >> 1);
for(row = ht; row != 0; row--)
{
for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
{
UWORD8 uc_temp;
/* Clipping the output of the six tap filter obtained from the
first stage of the 2d filter stage */
*pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
i2_macro = (*pi2_pred1_temp);
uc_temp = CLIP_U8(i2_macro);
*pu1_dst = (*pu1_dst + uc_temp + 1) >> 1;
}
pi2_pred1_temp += 5;
pu1_dst += dst_strd - wd;
}
}
/*!
**************************************************************************
* \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif
*
* \brief
* This routine applies the six tap filter to the predictors in the horizontal
* and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates
* pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4).
* The six tap filtering operation is described in sec 8.4.2.2.1 titled
* "Luma sample interpolation process"
*
* \param pu1_src: Pointer to the buffer containing the predictor values.
* pu1_src could point to the frame buffer or the predictor buffer.
* \param pu1_dst: Pointer to the destination buffer where the output of
* the six tap filter followed by interpolation is stored.
* \param wd: Width of the rectangular pixel grid to be interpolated
* \param ht: Height of the rectangular pixel grid to be interpolated
* \param src_strd: Width of the buffer pointed to by puc_pred.
* \param dst_strd: Width of the destination buffer
* \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
* \param dydx: x and y reference offset for qpel calculations.
*
* \return
* void
*
* \note
* This function takes the 8 bit predictor values, applies the six tap
* filter in the vertical direction and outputs the result clipped to
* 8 bit precision. The input is stored in the buffer pointed to by
* puc_pred while the output is stored in the buffer pointed by puc_dest.
* Both puc_pred and puc_dest could point to the same buffer i.e. the
* six tap filter could be done in place.
*
* \para <title>
* <paragraph>
* ...
**************************************************************************
*/
void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd,
UWORD8* pu1_tmp,
WORD32 dydx)
{
WORD32 row, col;
WORD32 tmp;
WORD32 y_offset = dydx >> 2;
WORD16* pi2_pred1_temp, *pi2_pred1;
UWORD8* pu1_dst_tmp;
//WORD32 x_offset = dydx & 0x3;
WORD16 i2_macro;
y_offset = y_offset & 0x3;
pi2_pred1_temp = (WORD16*)pu1_tmp;
pi2_pred1_temp += 2 * wd;
pi2_pred1 = pi2_pred1_temp;
pu1_dst_tmp = pu1_dst;
pu1_src -= 2 * src_strd;
for(row = -2; row < ht + 3; row++)
{
for(col = 0; col < wd; col++)
{
tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
+ ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
+ ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
pi2_pred1_temp[col - 2 * wd] = tmp;
}
pu1_src += src_strd;
pi2_pred1_temp += wd;
}
pi2_pred1_temp = pi2_pred1;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
+ ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
+ ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
tmp = (tmp + 512) >> 10;
pu1_dst[col] = CLIP_U8(tmp);
}
pi2_pred1 += wd;
pu1_dst += dst_strd;
}
pu1_dst = pu1_dst_tmp;
pi2_pred1_temp += (y_offset >> 1) * wd;
for(row = ht; row != 0; row--)
{
for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
{
UWORD8 u1_temp;
/* Clipping the output of the six tap filter obtained from the
first stage of the 2d filter stage */
*pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
i2_macro = (*pi2_pred1_temp);
u1_temp = CLIP_U8(i2_macro);
*pu1_dst = (*pu1_dst + u1_temp + 1) >> 1;
}
//pi16_pred1_temp += wd;
pu1_dst += dst_strd - wd;
}
}
/**
*******************************************************************************
* function:ih264_inter_pred_luma_bilinear
*
* @brief
* This routine applies the bilinear filter to the predictors .
* The filtering operation is described in
* sec 8.4.2.2.1 titled "Luma sample interpolation process"
*
* @par Description:
\note
* This function is called to obtain pixels lying at the following
* locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
* The function averages the two adjacent values from the two input arrays in horizontal direction.
*
*
* @param[in] pu1_src1:
* UWORD8 Pointer to the buffer containing the first input array.
*
* @param[in] pu1_src2:
* UWORD8 Pointer to the buffer containing the second input array.
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination where the output of bilinear filter is stored.
*
* @param[in] src_strd1
* Stride of the first input buffer
*
* @param[in] src_strd2
* Stride of the second input buffer
*
* @param[in] dst_strd
* integer destination stride of pu1_dst
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
UWORD8 *pu1_src2,
UWORD8 *pu1_dst,
WORD32 src_strd1,
WORD32 src_strd2,
WORD32 dst_strd,
WORD32 ht,
WORD32 wd)
{
WORD32 row, col;
WORD16 i2_tmp;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
i2_tmp = pu1_src1[col] + pu1_src2[col];
i2_tmp = (i2_tmp + 1) >> 1;
pu1_dst[col] = CLIP_U8(i2_tmp);
}
pu1_src1 += src_strd1;
pu1_src2 += src_strd2;
pu1_dst += dst_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Interprediction chroma filter
*
* @par Description:
* Applies filtering to chroma samples as mentioned in
* sec 8.4.2.2.2 titled "chroma sample interpolation process"
*
* @param[in] pu1_src
* UWORD8 pointer to the source containing alternate U and V samples
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] u1_dx
* dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
*
* @param[in] u1_dy
* dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void ih264_inter_pred_chroma(UWORD8 *pu1_src,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 dx,
WORD32 dy,
WORD32 ht,
WORD32 wd)
{
WORD32 row, col;
WORD16 i2_tmp;
for(row = 0; row < ht; row++)
{
for(col = 0; col < 2 * wd; col++)
{
i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
+ (dx) * (8 - dy) * pu1_src[col + 2]
+ (8 - dx) * (dy) * (pu1_src + src_strd)[col]
+ (dx) * (dy) * (pu1_src + src_strd)[col + 2];
i2_tmp = (i2_tmp + 32) >> 6;
pu1_dst[col] = CLIP_U8(i2_tmp);
}
pu1_src += src_strd;
pu1_dst += dst_strd;
}
}