blob: 4943c5c4580ada45531921acb6f09f33a2bee7e3 [file] [log] [blame]
/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/*!
******************************************************************************
* \file ihevce_enc_loop_pass.c
*
* \brief
* This file contains Encoder normative loop pass related functions
*
* \date
* 18/09/2012
*
* \author
* Ittiam
*
*
* List of Functions
*
*
******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <math.h>
#include <limits.h>
/* User include files */
#include "ihevc_typedefs.h"
#include "itt_video_api.h"
#include "ihevce_api.h"
#include "rc_cntrl_param.h"
#include "rc_frame_info_collector.h"
#include "rc_look_ahead_params.h"
#include "ihevc_defs.h"
#include "ihevc_macros.h"
#include "ihevc_debug.h"
#include "ihevc_structs.h"
#include "ihevc_platform_macros.h"
#include "ihevc_deblk.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_chroma_itrans_recon.h"
#include "ihevc_chroma_intra_pred.h"
#include "ihevc_intra_pred.h"
#include "ihevc_inter_pred.h"
#include "ihevc_mem_fns.h"
#include "ihevc_padding.h"
#include "ihevc_weighted_pred.h"
#include "ihevc_sao.h"
#include "ihevc_resi_trans.h"
#include "ihevc_quant_iquant_ssd.h"
#include "ihevc_cabac_tables.h"
#include "ihevc_common_tables.h"
#include "ihevc_quant_tables.h"
#include "ihevce_defs.h"
#include "ihevce_hle_interface.h"
#include "ihevce_lap_enc_structs.h"
#include "ihevce_multi_thrd_structs.h"
#include "ihevce_multi_thrd_funcs.h"
#include "ihevce_me_common_defs.h"
#include "ihevce_had_satd.h"
#include "ihevce_error_codes.h"
#include "ihevce_bitstream.h"
#include "ihevce_cabac.h"
#include "ihevce_rdoq_macros.h"
#include "ihevce_function_selector.h"
#include "ihevce_enc_structs.h"
#include "ihevce_entropy_structs.h"
#include "ihevce_cmn_utils_instr_set_router.h"
#include "ihevce_ipe_instr_set_router.h"
#include "ihevce_decomp_pre_intra_structs.h"
#include "ihevce_decomp_pre_intra_pass.h"
#include "ihevce_enc_loop_structs.h"
#include "ihevce_nbr_avail.h"
#include "ihevce_enc_loop_utils.h"
#include "ihevce_sub_pic_rc.h"
#include "ihevce_global_tables.h"
#include "ihevce_bs_compute_ctb.h"
#include "ihevce_cabac_rdo.h"
#include "ihevce_deblk.h"
#include "ihevce_frame_process.h"
#include "ihevce_rc_enc_structs.h"
#include "hme_datatype.h"
#include "hme_interface.h"
#include "hme_common_defs.h"
#include "hme_defs.h"
#include "ihevce_me_instr_set_router.h"
#include "ihevce_enc_subpel_gen.h"
#include "ihevce_inter_pred.h"
#include "ihevce_mv_pred.h"
#include "ihevce_mv_pred_merge.h"
#include "ihevce_enc_loop_inter_mode_sifter.h"
#include "ihevce_enc_cu_recursion.h"
#include "ihevce_enc_loop_pass.h"
#include "ihevce_common_utils.h"
#include "ihevce_dep_mngr_interface.h"
#include "ihevce_sao.h"
#include "ihevce_tile_interface.h"
#include "ihevce_profile.h"
#include "cast_types.h"
#include "osal.h"
#include "osal_defaults.h"
/*****************************************************************************/
/* Globals */
/*****************************************************************************/
extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
/*****************************************************************************/
/* Constant Macros */
/*****************************************************************************/
#define UPDATE_QP_AT_CTB 6
#define INTRAPRED_SIMD_LEFT_PADDING 16
#define INTRAPRED_SIMD_RIGHT_PADDING 8
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/*!
******************************************************************************
* \if Function name : ihevce_enc_loop_ctb_left_copy \endif
*
* \brief
* This function copy the right data of CTB to context buffers
*
* \date
* 18/09/2012
*
* \author
* Ittiam
*
* \return
*
* List of Functions
*
*
******************************************************************************
*/
void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
{
/* ------------------------------------------------------------------ */
/* copy the right coloum data to the context buffers */
/* ------------------------------------------------------------------ */
nbr_4x4_t *ps_left_nbr;
nbr_4x4_t *ps_nbr;
UWORD8 *pu1_buff;
WORD32 num_pels;
UWORD8 *pu1_luma_left, *pu1_chrm_left;
UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
/* copy right luma data */
pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
{
WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
pu1_luma_left[num_pels] = pu1_buff[i4_indx];
}
/* copy right chroma data */
pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
{
WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
*pu1_chrm_left++ = pu1_buff[i4_indx];
*pu1_chrm_left++ = pu1_buff[i4_indx + 1];
}
/* store the nbr 4x4 data at ctb level */
{
WORD32 ctr;
WORD32 nbr_strd;
nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
/* copy right nbr data */
ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
{
WORD32 i4_indx = nbr_strd * ctr;
ps_left_nbr[ctr] = ps_nbr[i4_indx];
}
}
return;
}
/*!
******************************************************************************
* \if Function name : ihevce_mark_all_modes_to_evaluate \endif
*
* \brief
* Mark all modes for inter/intra for evaluation. This function will be
* called by ref instance
*
* \param[in] pv_ctxt : pointer to enc_loop module
* \param[in] ps_cu_analyse : pointer to cu analyse
*
* \return
* None
*
* \author
* Ittiam
*
*****************************************************************************
*/
void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
{
UWORD8 ctr;
WORD32 i4_part;
(void)pv_ctxt;
/* run a loop over all Inter cands */
for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
{
ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
}
/* run a loop over all intra candidates */
if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
{
for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
{
ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
{
ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
}
}
}
}
/*!
******************************************************************************
* \if Function name : ihevce_cu_mode_decide \endif
*
* \brief
* Coding Unit mode decide function. Performs RD opt and decides the best mode
*
* \param[in] ps_ctxt : pointer to enc_loop module
* \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
* \param[in] ps_cu_analyse : pointer to cu analyse
* \param[out] ps_cu_final : pointer to cu final
* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
* \param[out]ps_row_col_pu; colocated pu buffer pointer
* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
* \param[in]col_start_pu_idx : pu index start value
*
* \return
* None
*
*
* \author
* Ittiam
*
*****************************************************************************
*/
LWORD64 ihevce_cu_mode_decide(
ihevce_enc_loop_ctxt_t *ps_ctxt,
enc_loop_cu_prms_t *ps_cu_prms,
cu_analyse_t *ps_cu_analyse,
final_mode_state_t *ps_final_mode_state,
UWORD8 *pu1_ecd_data,
pu_col_mv_t *ps_col_pu,
UWORD8 *pu1_col_pu_map,
WORD32 col_start_pu_idx)
{
enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
cu_nbr_prms_t s_cu_nbr_prms;
inter_cu_mode_info_t s_inter_cu_mode_info;
cu_inter_cand_t *ps_best_inter_cand = NULL;
UWORD8 *pu1_cu_top;
UWORD8 *pu1_cu_top_left;
UWORD8 *pu1_cu_left;
UWORD8 *pu1_final_recon = NULL;
UWORD8 *pu1_curr_src = NULL;
void *pv_curr_src = NULL;
void *pv_cu_left = NULL;
void *pv_cu_top = NULL;
void *pv_cu_top_left = NULL;
WORD32 cu_left_stride = 0;
WORD32 ctr;
WORD32 rd_opt_best_idx;
LWORD64 rd_opt_least_cost;
WORD32 rd_opt_curr_idx;
WORD32 num_4x4_in_ctb;
WORD32 nbr_4x4_left_strd = 0;
nbr_4x4_t *ps_topleft_nbr_4x4;
nbr_4x4_t *ps_left_nbr_4x4 = NULL;
nbr_4x4_t *ps_top_nbr_4x4 = NULL;
nbr_4x4_t *ps_curr_nbr_4x4;
WORD32 enable_intra_eval_flag;
WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
WORD32 curr_cu_pos_in_row;
WORD32 cu_top_right_offset;
WORD32 cu_top_right_dep_pos;
WORD32 i4_ctb_x_off, i4_ctb_y_off;
UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
(void)ps_final_mode_state;
/* default init */
rd_opt_least_cost = MAX_COST_64;
ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
/* Zero cbf tool is enabled by default for all presets */
ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
rd_opt_best_idx = 1;
rd_opt_curr_idx = 0;
enable_intra_eval_flag = 1;
/* CU params in enc ctxt*/
ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
/* CB and Cr are pixel interleaved */
s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
if(!ps_ctxt->u1_is_input_data_hbd)
{
/* --------------------------------------- */
/* ----- Luma Pointers Derivation -------- */
/* --------------------------------------- */
/* based on CU position derive the pointers */
pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
pv_curr_src = pu1_curr_src;
/* CU left */
if(0 == ps_cu_analyse->b3_cu_pos_x)
{
/* CTB boundary */
pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
cu_left_stride = 1;
ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
nbr_4x4_left_strd = 1;
}
else
{
/* inside CTB */
pu1_cu_left = pu1_final_recon - 1;
cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
nbr_4x4_left_strd = num_4x4_in_ctb;
}
pv_cu_left = pu1_cu_left;
/* CU top */
if(0 == ps_cu_analyse->b3_cu_pos_y)
{
/* CTB boundary */
pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
}
else
{
/* inside CTB */
pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
}
pv_cu_top = pu1_cu_top;
/* CU top left */
if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
{
/* left ctb boundary but not first row */
pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
}
else
{
/* rest all cases topleft is top -1 */
pu1_cu_top_left = pu1_cu_top - 1;
ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
}
pv_cu_top_left = pu1_cu_top_left;
/* Store the CU nbr information in the ctxt for final reconstruction fun. */
s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
s_cu_nbr_prms.cu_left_stride = cu_left_stride;
/* ------------------------------------------------------------ */
/* -- Initialize the number of neigbour skip cu count for rdo --*/
/* ------------------------------------------------------------ */
{
nbr_avail_flags_t s_nbr;
WORD32 i4_num_nbr_skip_cus = 0;
/* get the neighbour availability flags for current cu */
ihevce_get_nbr_intra(
&s_nbr,
ps_ctxt->pu1_ctb_nbr_map,
ps_ctxt->i4_nbr_map_strd,
(ps_cu_analyse->b3_cu_pos_x << 1),
(ps_cu_analyse->b3_cu_pos_y << 1),
(ps_cu_analyse->u1_cu_size >> 2));
if(s_nbr.u1_top_avail)
{
i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
}
if(s_nbr.u1_left_avail)
{
i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
}
ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
i4_num_nbr_skip_cus;
ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
i4_num_nbr_skip_cus;
}
/* --------------------------------------- */
/* --- Chroma Pointers Derivation -------- */
/* --------------------------------------- */
/* based on CU position derive the pointers */
s_chrm_cu_buf_prms.pu1_final_recon =
ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
s_chrm_cu_buf_prms.pu1_curr_src =
ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
s_chrm_cu_buf_prms.pu1_final_recon +=
((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
s_chrm_cu_buf_prms.pu1_curr_src +=
((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
/* CU left */
if(0 == ps_cu_analyse->b3_cu_pos_x)
{
/* CTB boundary */
s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
}
else
{
/* inside CTB */
s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
}
/* CU top */
if(0 == ps_cu_analyse->b3_cu_pos_y)
{
/* CTB boundary */
s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
}
else
{
/* inside CTB */
s_chrm_cu_buf_prms.pu1_cu_top =
s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
}
/* CU top left */
if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
{
/* left ctb boundary but not first row */
s_chrm_cu_buf_prms.pu1_cu_top_left =
s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
}
else
{
/* rest all cases topleft is top -2 */
s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
}
}
/* Set Variables for Dep. Checking and Setting */
i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
/* Set the pred pointer count for ME/intra to 0 to start */
ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
ASSERT(
(ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
s_inter_cu_mode_info.u1_num_inter_cands = 0;
s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
if(0 != ps_cu_analyse->u1_num_inter_cands)
{
ihevce_inter_cand_sifter_prms_t s_prms;
UWORD8 u1_enable_top_row_sync;
if(ps_ctxt->u1_disable_intra_eval)
{
u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
}
else
{
u1_enable_top_row_sync = 1;
}
if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
{
/* Wait till top data is ready */
/* Currently checking till top right CU */
curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
if(i4_ctb_y_off == 0)
{
/* No wait for 1st row */
cu_top_right_offset = -(MAX_CTB_SIZE);
{
ihevce_tile_params_t *ps_col_tile_params =
((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
ps_ctxt->i4_tile_col_idx);
/* No wait for 1st row */
cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
}
cu_top_right_dep_pos = 0;
}
else
{
cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
}
if(0 == ps_cu_analyse->b3_cu_pos_y)
{
ihevce_dmgr_chk_row_row_sync(
ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
curr_cu_pos_in_row,
cu_top_right_offset,
cu_top_right_dep_pos,
ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
ps_ctxt->thrd_id);
}
}
s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
s_prms.pv_src = pv_curr_src;
s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
s_prms.u1_use_merge_cand_from_top_row =
(u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
s_prms.u1_merge_idx_cabac_model =
ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
s_prms.u1_reuse_me_sad = 1;
#else
s_prms.u1_reuse_me_sad = 0;
#endif
if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
{
if(ps_ctxt->i4_temporal_layer == 1)
{
s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
}
else
{
s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
}
}
else
{
s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
}
s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
if(s_prms.u1_is_cu_noisy)
{
s_prms.i4_lambda_qf =
((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
}
s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
ihevce_inter_cand_sifter(&s_prms);
}
if(u1_is_422)
{
UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
UWORD8 u1_num_bufs_allocated;
u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
ctr++)
{
{
ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
(UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
}
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
}
{
ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
(UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
}
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
}
else
{
UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
UWORD8 u1_num_bufs_allocated;
u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
ctr++)
{
{
ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
(UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
}
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
}
}
ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
/* --------------------------------------- */
/* ------ Inter RD OPT stage ------------- */
/* --------------------------------------- */
if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
{
UWORD8 u1_ssd_bit_info_ctr = 0;
/* -- run a loop over all Inter rd opt cands ------ */
for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
{
cu_inter_cand_t *ps_inter_cand;
LWORD64 rd_opt_cost = 0;
ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
(ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
{
ps_inter_cand->b1_eval_mark = 1;
}
/****************************************************************/
/* This check is only valid for derived instances. */
/* check if this mode needs to be evaluated or not. */
/* if it is a skip candidate, go ahead and evaluate it even if */
/* it has not been marked while sorting. */
/****************************************************************/
if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
{
continue;
}
/* RDOPT related copies and settings */
ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
/* RDOPT copy States : Prev Cu best to current init */
COPY_CABAC_STATES(
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
IHEVC_CAB_CTXT_END * sizeof(UWORD8));
/* MVP ,MVD calc and Motion compensation */
rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
ps_ctxt,
ps_inter_cand,
ps_cu_analyse->u1_cu_size,
ps_cu_analyse->b3_cu_pos_x,
ps_cu_analyse->b3_cu_pos_y,
ps_left_nbr_4x4,
ps_top_nbr_4x4,
ps_topleft_nbr_4x4,
nbr_4x4_left_strd,
rd_opt_curr_idx);
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
{
ihevce_determine_tu_tree_distribution(
ps_inter_cand,
(me_func_selector_t *)ps_ctxt->pv_err_func_selector,
ps_ctxt->ai2_scratch,
(UWORD8 *)pv_curr_src,
ps_cu_prms->i4_luma_src_stride,
ps_ctxt->i4_satd_lamda,
LAMBDA_Q_SHIFT,
ps_cu_analyse->u1_cu_size,
ps_ctxt->u1_max_tr_depth);
}
#endif
#if DISABLE_ZERO_ZBF_IN_INTER
ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
#else
ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
#endif
/* Recon loop with different TUs based on partition type*/
rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
ps_ctxt,
ps_cu_prms,
pv_curr_src,
ps_cu_analyse->u1_cu_size,
ps_cu_analyse->b3_cu_pos_x,
ps_cu_analyse->b3_cu_pos_y,
rd_opt_curr_idx,
&s_chrm_cu_buf_prms,
ps_inter_cand,
ps_cu_analyse,
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
100.0);
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
{
ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
}
#endif
/* based on the rd opt cost choose the best and current index */
if(rd_opt_cost < rd_opt_least_cost)
{
/* swap the best and current indx */
rd_opt_best_idx = !rd_opt_best_idx;
rd_opt_curr_idx = !rd_opt_curr_idx;
ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
rd_opt_least_cost = rd_opt_cost;
i4_best_cu_qp = ps_ctxt->i4_cu_qp;
/* Store the best Inter cand. for final_recon function */
ps_best_inter_cand = ps_inter_cand;
}
/* set the neighbour map to 0 */
ihevce_set_nbr_map(
ps_ctxt->pu1_ctb_nbr_map,
ps_ctxt->i4_nbr_map_strd,
(ps_cu_analyse->b3_cu_pos_x << 1),
(ps_cu_analyse->b3_cu_pos_y << 1),
(ps_cu_analyse->u1_cu_size >> 2),
0);
} /* end of loop for all the Inter RD OPT cand */
}
/* --------------------------------------- */
/* ---- Conditional Eval of Intra -------- */
/* --------------------------------------- */
{
enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
/* check if inter candidates are valid */
if(0 != ps_cu_analyse->u1_num_inter_cands)
{
/* if skip or no residual inter candidates has won then */
/* evaluation of intra candidates is disabled */
if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
(0 == ps_enc_loop_bestprms->u1_is_cu_coded))
{
enable_intra_eval_flag = 0;
}
}
/* Disable Intra Gating for HIGH QUALITY PRESET */
#if !ENABLE_INTRA_GATING_FOR_HQ
if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
{
enable_intra_eval_flag = 1;
#if DISABLE_LARGE_INTRA_PQ
if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
(ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
{
if(ps_cu_analyse->u1_cu_size > 16)
{
/* Disable 32x32 / 64x64 Intra in PQ P and B pics */
enable_intra_eval_flag = 0;
}
else if(ps_cu_analyse->u1_cu_size == 16)
{
/* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
}
}
#endif
}
#endif
}
/* --------------------------------------- */
/* ------ Intra RD OPT stage ------------- */
/* --------------------------------------- */
/* -- run a loop over all Intra rd opt cands ------ */
if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
{
LWORD64 rd_opt_cost;
WORD32 end_flag = 0;
WORD32 cu_eval_done = 0;
WORD32 subcu_eval_done = 0;
WORD32 subpu_eval_done = 0;
WORD32 max_trans_size;
WORD32 sync_wait_stride;
max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
if(!ps_ctxt->u1_use_top_at_ctb_boundary)
{
/* Wait till top data is ready */
/* Currently checking till top right CU */
curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
if(i4_ctb_y_off == 0)
{
/* No wait for 1st row */
cu_top_right_offset = -(MAX_CTB_SIZE);
{
ihevce_tile_params_t *ps_col_tile_params =
((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
ps_ctxt->i4_tile_col_idx);
/* No wait for 1st row */
cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
}
cu_top_right_dep_pos = 0;
}
else
{
cu_top_right_offset = sync_wait_stride;
cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
}
if(0 == ps_cu_analyse->b3_cu_pos_y)
{
ihevce_dmgr_chk_row_row_sync(
ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
curr_cu_pos_in_row,
cu_top_right_offset,
cu_top_right_dep_pos,
ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
ps_ctxt->thrd_id);
}
}
ctr = 0;
/* Zero cbf tool is disabled for intra CUs */
#if ENABLE_ZERO_CBF_IN_INTRA
ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
#else
ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
#endif
/* Intra Mode gating based on MPM cand list and encoder quality preset */
if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
{
ihevce_mpm_idx_based_filter_RDOPT_cand(
ps_ctxt,
ps_cu_analyse,
ps_left_nbr_4x4,
ps_top_nbr_4x4,
&ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
&ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
ihevce_mpm_idx_based_filter_RDOPT_cand(
ps_ctxt,
ps_cu_analyse,
ps_left_nbr_4x4,
ps_top_nbr_4x4,
&ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
&ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
}
/* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
{
/* For cu_size = 64, there won't be any TU_EQ_CU case */
if(64 != ps_cu_analyse->u1_cu_size)
{
/* RDOPT copy States : Prev Cu best to current init */
COPY_CABAC_STATES(
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
IHEVC_CAB_CTXT_END);
/* RDOPT related copies and settings */
ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
/* Calc. best SATD mode for TU_EQ_CU case */
((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
ps_ctxt,
&s_chrm_cu_buf_prms,
ps_cu_analyse,
rd_opt_curr_idx,
TU_EQ_CU,
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
100.0,
ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
{
ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
}
#endif
}
/* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
TU_EQ_CU_DIV2 case */
if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
255) &&
(8 != ps_cu_analyse->u1_cu_size))
{
/* RDOPT copy States : Prev Cu best to current init */
COPY_CABAC_STATES(
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
IHEVC_CAB_CTXT_END);
/* RDOPT related copies and settings */
ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
/* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
ps_ctxt,
&s_chrm_cu_buf_prms,
ps_cu_analyse,
rd_opt_curr_idx,
TU_EQ_CU_DIV2,
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
100.0,
ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
{
ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
}
#endif
}
}
while(0 == end_flag)
{
UWORD8 *pu1_mode = NULL;
WORD32 curr_func_mode = 0;
void *pv_pred;
ASSERT(ctr < 36);
/* TU equal to CU size evaluation of different modes */
if(0 == cu_eval_done)
{
/* check if the all the modes have been evaluated */
if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
{
cu_eval_done = 1;
ctr = 0;
}
else if(
(1 == ctr) &&
((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
(ps_ctxt->i1_slice_type != ISLICE))
{
ctr = 0;
cu_eval_done = 1;
subcu_eval_done = 1;
subpu_eval_done = 1;
}
else
{
if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
{
ctr++;
continue;
}
pu1_mode =
&ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
ctr++;
curr_func_mode = TU_EQ_CU;
}
}
/* Sub CU (NXN) mode evaluation of different pred modes */
if((0 == subpu_eval_done) && (1 == cu_eval_done))
{
/*For NxN modes evaluation all candidates for all PU parts are evaluated */
/*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
{
pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
curr_func_mode = TU_EQ_SUBCU;
/* check if the any modes have to be evaluated */
if(255 == *pu1_mode)
{
subpu_eval_done = 1;
ctr = 0;
}
else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
{
subpu_eval_done = 1;
ctr = 0;
}
else
{
ctr++;
}
}
}
/* TU size equal to CU div2 mode evaluation of different pred modes */
if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
{
/* check if the all the modes have been evaluated */
if(255 ==
ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
{
subcu_eval_done = 1;
}
else if(
(1 == ctr) &&
((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
(ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
{
subcu_eval_done = 1;
}
else
{
if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
{
ctr++;
continue;
}
pu1_mode = &ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
ctr++;
curr_func_mode = TU_EQ_CU_DIV2;
}
}
/* check if all CU option have been evalueted */
if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
{
break;
}
/* RDOPT related copies and settings */
ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
/* Assign ME/Intra pred buf. to the current intra cand. since we
are storing pred data for final_reon function */
{
pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
}
/* RDOPT copy States : Prev Cu best to current init */
COPY_CABAC_STATES(
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
IHEVC_CAB_CTXT_END);
/* call the function which performs the normative Intra encode */
rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
ps_ctxt,
ps_cu_prms,
pv_pred,
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
&s_chrm_cu_buf_prms,
pu1_mode,
ps_cu_analyse,
pv_curr_src,
pv_cu_left,
pv_cu_top,
pv_cu_top_left,
ps_left_nbr_4x4,
ps_top_nbr_4x4,
nbr_4x4_left_strd,
cu_left_stride,
rd_opt_curr_idx,
curr_func_mode,
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
100.0);
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
{
ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
}
#endif
/* based on the rd opt cost choose the best and current index */
if(rd_opt_cost < rd_opt_least_cost)
{
/* swap the best and current indx */
rd_opt_best_idx = !rd_opt_best_idx;
rd_opt_curr_idx = !rd_opt_curr_idx;
i4_best_cu_qp = ps_ctxt->i4_cu_qp;
rd_opt_least_cost = rd_opt_cost;
ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
}
if((TU_EQ_SUBCU == curr_func_mode) &&
(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
{
UWORD8 au1_tu_eq_cu_div2_modes[4];
UWORD8 au1_freq_of_mode[4];
if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
{
ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
255;
}
else
{
WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
au1_tu_eq_cu_div2_modes,
au1_freq_of_mode,
4);
if(2 == i4_num_clusters)
{
if(au1_freq_of_mode[0] == 3)
{
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
au1_tu_eq_cu_div2_modes[0];
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
}
else if(au1_freq_of_mode[1] == 3)
{
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
au1_tu_eq_cu_div2_modes[1];
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
}
else
{
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
au1_tu_eq_cu_div2_modes[0];
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
au1_tu_eq_cu_div2_modes[1];
ps_cu_analyse->s_cu_intra_cand
.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
}
}
}
}
/* set the neighbour map to 0 */
ihevce_set_nbr_map(
ps_ctxt->pu1_ctb_nbr_map,
ps_ctxt->i4_nbr_map_strd,
(ps_cu_analyse->b3_cu_pos_x << 1),
(ps_cu_analyse->b3_cu_pos_y << 1),
(ps_cu_analyse->u1_cu_size >> 2),
0);
}
} /* end of Intra RD OPT cand evaluation */
ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
ps_ctxt->i4_cu_qp = i4_best_cu_qp;
ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
/* --------------------------------------- */
/* --------Final mode Recon ---------- */
/* --------------------------------------- */
{
enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
void *pv_final_pred = NULL;
WORD32 final_pred_strd = 0;
void *pv_final_pred_chrm = NULL;
WORD32 final_pred_strd_chrm = 0;
WORD32 packed_pred_mode;
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
{
pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
}
#else
pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
#endif
ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
packed_pred_mode =
ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
if(!ps_ctxt->u1_is_input_data_hbd)
{
if(ps_enc_loop_bestprms->u1_intra_flag)
{
pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
final_pred_strd =
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
}
else
{
pv_final_pred = ps_best_inter_cand->pu1_pred_data;
final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
}
pv_final_pred_chrm =
ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
(u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
final_pred_strd_chrm =
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
}
ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
{
final_mode_process_prms_t s_prms;
void *pv_cu_luma_recon;
void *pv_cu_chroma_recon;
WORD32 luma_stride, chroma_stride;
if(!ps_ctxt->u1_is_input_data_hbd)
{
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
{
pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
luma_stride = ps_cu_analyse->u1_cu_size;
chroma_stride = ps_cu_analyse->u1_cu_size;
}
else
{
/* based on CU position derive the luma pointers */
pv_cu_luma_recon = pu1_final_recon;
/* based on CU position derive the chroma pointers */
pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
luma_stride = ps_cu_prms->i4_luma_recon_stride;
chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
}
#else
pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
luma_stride = ps_cu_analyse->u1_cu_size;
chroma_stride = ps_cu_analyse->u1_cu_size;
#endif
s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
s_prms.ps_best_inter_cand = ps_best_inter_cand;
s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
s_prms.packed_pred_mode = packed_pred_mode;
s_prms.rd_opt_best_idx = rd_opt_best_idx;
s_prms.pv_src = pu1_curr_src;
s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
s_prms.pv_pred = pv_final_pred;
s_prms.pred_strd = final_pred_strd;
s_prms.pv_pred_chrm = pv_final_pred_chrm;
s_prms.pred_chrm_strd = final_pred_strd_chrm;
s_prms.pu1_final_ecd_data = pu1_ecd_data;
s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
s_prms.pv_luma_recon = pv_cu_luma_recon;
s_prms.recon_luma_strd = luma_stride;
s_prms.pv_chrm_recon = pv_cu_chroma_recon;
s_prms.recon_chrma_strd = chroma_stride;
s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
s_prms.u1_will_cabac_state_change = 1;
s_prms.u1_recompute_sbh_and_rdoq = 0;
s_prms.u1_is_first_pass = 1;
}
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
? ps_cu_prms->u1_is_cu_noisy
: ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
#endif
((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
{
ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
}
#endif
}
}
/* --------------------------------------- */
/* --------Populate CU out prms ---------- */
/* --------------------------------------- */
{
enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
UWORD8 *pu1_pu_map;
ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
/* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
/* then it has to be coded as skip CU */
if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
(1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
(0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
{
ps_enc_loop_bestprms->u1_skip_flag = 1;
}
/* update number PUs in CU */
ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
/* ---- populate the colocated pu map index --- */
for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
{
WORD32 i;
WORD32 vert_ht;
WORD32 horz_wd;
if(ps_enc_loop_bestprms->u1_intra_flag)
{
ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
vert_ht = ps_cu_analyse->u1_cu_size >> 2;
horz_wd = ps_cu_analyse->u1_cu_size >> 2;
}
else
{
vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
}
pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
for(i = 0; i < vert_ht; i++)
{
memset(pu1_pu_map, col_start_pu_idx, horz_wd);
pu1_pu_map += num_4x4_in_ctb;
}
/* increment the index */
col_start_pu_idx++;
}
/* ---- copy the colocated PUs to frm pu ----- */
memcpy(
ps_col_pu,
&ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
/*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
{
entropy_context_t *ps_entropy_ctxt;
WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
WORD32 log2_min_cu_qp_delta_size;
UWORD32 block_addr_align;
ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
/*Update the Qp value used. It will not have a valid value iff
current CU is (skipped/no_cbf). In that case the Qp needed for
deblocking is calculated from top/left/previous coded CU*/
ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
{
ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
}
else
{
ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
}
}
/* -- at the end of CU set the neighbour map to 1 -- */
ihevce_set_nbr_map(
ps_ctxt->pu1_ctb_nbr_map,
ps_ctxt->i4_nbr_map_strd,
(ps_cu_analyse->b3_cu_pos_x << 1),
(ps_cu_analyse->b3_cu_pos_y << 1),
(ps_cu_analyse->u1_cu_size >> 2),
1);
/* -- at the end of CU update best cabac rdopt states -- */
/* -- and also set the top row skip flags ------------- */
ihevce_entropy_update_best_cu_states(
&ps_ctxt->s_rdopt_entropy_ctxt,
ps_cu_analyse->b3_cu_pos_x,
ps_cu_analyse->b3_cu_pos_y,
ps_cu_analyse->u1_cu_size,
0,
rd_opt_best_idx);
}
/* Store Output struct */
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
{
{
memcpy(
&ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
&ps_ctxt->as_cu_prms[rd_opt_best_idx],
sizeof(enc_loop_cu_final_prms_t));
}
memcpy(
&ps_ctxt->as_cu_recur_nbr[0],
&ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
(ps_cu_analyse->u1_cu_size >> 2));
ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
}
#else
if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
{
ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
{
/* Wait till top data is ready */
/* Currently checking till top right CU */
curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
if(i4_ctb_y_off == 0)
{
/* No wait for 1st row */
cu_top_right_offset = -(MAX_CTB_SIZE);
{
ihevce_tile_params_t *ps_col_tile_params =
((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
ps_ctxt->i4_tile_col_idx);
/* No wait for 1st row */
cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
}
cu_top_right_dep_pos = 0;
}
else
{
cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
}
if(0 == ps_cu_analyse->b3_cu_pos_y)
{
ihevce_dmgr_chk_row_row_sync(
ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
curr_cu_pos_in_row,
cu_top_right_offset,
cu_top_right_dep_pos,
ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
ps_ctxt->thrd_id);
}
}
}
else
{
{
memcpy(
&ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
&ps_ctxt->as_cu_prms[rd_opt_best_idx],
sizeof(enc_loop_cu_final_prms_t));
}
memcpy(
&ps_ctxt->as_cu_recur_nbr[0],
&ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
(ps_cu_analyse->u1_cu_size >> 2));
ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
}
#endif
ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
return rd_opt_least_cost;
}
/*!
******************************************************************************
* \if Function name : ihevce_enc_loop_process_row \endif
*
* \brief
* Row level enc_loop pass function
*
* \param[in] pv_ctxt : pointer to enc_loop module
* \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer)
* \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
* \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
* \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
* \param[out] ps_cu_out : pointer CU output structure (row buffer)
* \param[out] ps_tu_out : pointer TU output structure (row buffer)
* \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
* \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
*
* \return
* None
*
* Note : Currently the frame level calcualtions done assumes that
* framewidth of the input /recon are excat multiple of ctbsize
*
* \author
* Ittiam
*
*****************************************************************************
*/
void ihevce_enc_loop_process_row(
ihevce_enc_loop_ctxt_t *ps_ctxt,
iv_enc_yuv_buf_t *ps_curr_src_bufs,
iv_enc_yuv_buf_t *ps_curr_recon_bufs,
iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
UWORD8 **ppu1_y_subpel_planes,
ctb_analyse_t *ps_ctb_in,
ctb_enc_loop_out_t *ps_ctb_out,
ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
cur_ctb_cu_tree_t *ps_row_cu_tree,
cu_enc_loop_out_t *ps_row_cu,
tu_enc_loop_out_t *ps_row_tu,
pu_t *ps_row_pu,
pu_col_mv_t *ps_row_col_pu,
UWORD16 *pu2_num_pu_map,
UWORD8 *pu1_row_pu_map,
UWORD8 *pu1_row_ecd_data,
UWORD32 *pu4_pu_offsets,
frm_ctb_ctxt_t *ps_frm_ctb_prms,
WORD32 vert_ctr,
recon_pic_buf_t *ps_frm_recon,
void *pv_dep_mngr_encloop_dep_me,
pad_interp_recon_frm_t *ps_pad_interp_recon,
WORD32 i4_pass,
multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
ihevce_tile_params_t *ps_tile_params)
{
enc_loop_cu_prms_t s_cu_prms;
ctb_enc_loop_out_t *ps_ctb_out_dblk;
WORD32 ctb_ctr, ctb_start, ctb_end;
WORD32 col_pu_map_idx;
WORD32 num_ctbs_horz_pic;
WORD32 ctb_size;
WORD32 last_ctb_row_flag;
WORD32 last_ctb_col_flag;
WORD32 last_hz_ctb_wd;
WORD32 last_vt_ctb_ht;
void *pv_dep_mngr_enc_loop_dblk;
void *pv_dep_mngr_enc_loop_cu_top_right;
WORD32 dblk_offset, dblk_check_dep_pos;
WORD32 aux_offset, aux_check_dep_pos;
void *pv_dep_mngr_me_dep_encloop;
ctb_enc_loop_out_t *ps_ctb_out_sao;
/*Structure to store deblocking parameters at CTB-row level*/
deblk_ctbrow_prms_t s_deblk_ctb_row_params;
UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
ctb_size = ps_frm_ctb_prms->i4_ctb_size;
/* Store the num_ctb_horz in sao context*/
ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
/* Get the EncLoop Deblock Dep Mngr */
pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
/* Get the EncLoop Top-Right CU Dep Mngr */
pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
/* Set Variables for Dep. Checking and Setting */
aux_check_dep_pos = vert_ctr;
aux_offset = 2; /* Should be there for 0th row also */
if(vert_ctr > 0)
{
dblk_check_dep_pos = vert_ctr - 1;
dblk_offset = 2;
}
else
{
/* First row should run without waiting */
dblk_check_dep_pos = 0;
dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
}
/* check if the current row processed in last CTb row */
last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
/* Valid Width (pixels) in the last CTB in every row (padding cases) */
last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
/* Valid Height (pixels) in the last CTB row (padding cases) */
last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
/* reset the states copied flag */
ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
/* populate the cu prms which are common for entire ctb row */
s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
s_cu_prms.i4_ctb_size = ctb_size;
ps_ctxt->i4_is_first_cu_qg_coded = 0;
/* Initialize the number of PUs for the first CTB to 0 */
*pu2_num_pu_map = 0;
/*Getting the address of BS and Qp arrays and other info*/
memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
{
WORD32 num_ctbs_horz_tile;
/* Update the pointers which are accessed not by using ctb_ctr
to the tile start here! */
ps_ctb_in += ps_tile_params->i4_first_ctb_x;
ps_ctb_out += ps_tile_params->i4_first_ctb_x;
ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
pu1_row_ecd_data +=
(ps_tile_params->i4_first_ctb_x *
((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
: ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
MAX_SCAN_COEFFS_BYTES_4x4);
/* Update the pointers to the tile start */
s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
(ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
(ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
ctb_start = ps_tile_params->i4_first_ctb_x;
ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
}
ps_ctb_out_dblk = ps_ctb_out;
ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
/* --------- Loop over all the CTBs in a row --------------- */
for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
{
cu_final_update_prms s_cu_update_prms;
cur_ctb_cu_tree_t *ps_cu_tree_analyse;
me_ctb_data_t *ps_cu_me_data;
ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
cu_enc_loop_out_t *ps_cu_final;
pu_col_mv_t *ps_ctb_col_pu;
WORD32 cur_ctb_ht, cur_ctb_wd;
WORD32 last_cu_pos_in_ctb;
WORD32 last_cu_size;
WORD32 num_pus_in_ctb;
UWORD8 u1_is_ctb_noisy;
ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
if(ctb_ctr)
{
ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
}
/*If Sup pic rc is enabled*/
if(ps_ctxt->i4_sub_pic_level_rc)
{
ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
}
/* check if the current row processed in last CTb row */
last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
if(1 == last_ctb_col_flag)
{
cur_ctb_wd = last_hz_ctb_wd;
}
else
{
cur_ctb_wd = ctb_size;
}
/* If it's the last CTB, get the actual ht of CTB */
if(1 == last_ctb_row_flag)
{
cur_ctb_ht = last_vt_ctb_ht;
}
else
{
cur_ctb_ht = ctb_size;
}
ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
/* Wait till reference frame recon is available */
/* ------------ Wait till current data is ready from ME -------------- */
/*only for ref instance and Non I pics */
if((ps_ctxt->i4_bitrate_instance_num == 0) &&
((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
{
if(ctb_ctr < (num_ctbs_horz_pic))
{
ihevce_dmgr_chk_row_row_sync(
pv_dep_mngr_encloop_dep_me,
ctb_ctr,
1,
vert_ctr,
ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
ps_ctxt->thrd_id);
}
}
/* store the cu pointer for current ctb out */
ps_ctb_out->ps_enc_cu = ps_row_cu;
ps_cu_final = ps_row_cu;
/* Get the base point of CU recursion tree */
if(ISLICE != ps_ctxt->i1_slice_type)
{
ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
}
else
{
/* Initialize ptr to current CTB */
ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
}
/* Get the ME data pointer for 16x16 block data in ctb */
ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
/* store the ctb level prms in cu prms */
s_cu_prms.i4_ctb_pos = ctb_ctr;
s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
{
s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
}
s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
/* Initialize ptr to current CTB */
ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size;
/* reset the map idx for current ctb */
col_pu_map_idx = 0;
num_pus_in_ctb = 0;
/* reset the map buffer to 0*/
memset(
&ps_ctxt->au1_nbr_ctb_map[0][0],
0,
(MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
/* set the CTB neighbour availability flags */
ihevce_set_ctb_nbr(
&ps_ctb_out->s_ctb_nbr_avail_flags,
ps_ctxt->pu1_ctb_nbr_map,
ps_ctxt->i4_nbr_map_strd,
ctb_ctr,
vert_ctr,
ps_frm_ctb_prms);
/* -------- update the cur CTB offsets for inter prediction-------- */
ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
/* -------- update the cur CTB offsets for MV prediction-------- */
ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
/* -------------- Boundary Strength Initialization ----------- */
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
{
ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
}
/* -------- update cur CTB offsets for entropy rdopt context------- */
ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
/* --------- CU Recursion --------------- */
{
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
WORD32 i4_max_tree_depth = 4;
#endif
WORD32 i4_tree_depth = 0;
/* Init no. of CU in CTB to 0*/
ps_ctb_out->u1_num_cus_in_ctb = 0;
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(ps_ctxt->i4_bitrate_instance_num == 0)
{
WORD32 i4_max_tree_depth = 4;
WORD32 i;
for(i = 0; i < i4_max_tree_depth; i++)
{
COPY_CABAC_STATES(
&ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
IHEVC_CAB_CTXT_END * sizeof(UWORD8));
}
}
#else
if(ps_ctxt->i4_bitrate_instance_num == 0)
{
if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
{
WORD32 i4_max_tree_depth = 4;
WORD32 i;
for(i = 0; i < i4_max_tree_depth; i++)
{
COPY_CABAC_STATES(
&ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
IHEVC_CAB_CTXT_END * sizeof(UWORD8));
}
}
}
#endif
if(ps_ctxt->i4_bitrate_instance_num == 0)
{
/* FOR I- PIC populate the curr_ctb accordingly */
if(ISLICE == ps_ctxt->i1_slice_type)
{
ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
ihevce_populate_cu_tree(
ps_ctb_ipe_analyse,
ps_cu_tree_analyse,
0,
(IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
POS_NA,
POS_NA,
POS_NA);
}
}
ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
if(ps_ctxt->i4_use_ctb_level_lamda)
{
ihevce_compute_cu_level_QP(
ps_ctxt, -1, ps_ctb_ipe_analyse->i4_64x64_act_factor[3][1], 0);
}
s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
s_cu_update_prms.pps_cu_final = &ps_cu_final;
s_cu_update_prms.pps_row_pu = &ps_row_pu;
s_cu_update_prms.pps_row_tu = &ps_row_tu;
s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
// source satd computation
/* compute the source 8x8 SATD for the current CTB */
/* populate pui4_source_satd in some structure and pass it inside */
if(ps_ctxt->u1_enable_psyRDOPT)
{
/* declare local variables */
WORD32 i;
WORD32 ctb_size;
WORD32 num_comp_had_blocks;
UWORD8 *pu1_l0_block;
WORD32 block_ht;
WORD32 block_wd;
WORD32 ht_offset;
WORD32 wd_offset;
WORD32 num_horz_blocks;
WORD32 had_block_size;
WORD32 total_had_block_size;
WORD16 pi2_residue_had_zscan[64];
UWORD8 ai1_zeros_buffer[64];
WORD32 index_satd;
WORD32 is_hbd;
/* initialize the variables */
block_ht = cur_ctb_ht;
block_wd = cur_ctb_wd;
is_hbd = ps_ctxt->u1_is_input_data_hbd;
had_block_size = 8;
total_had_block_size = had_block_size * had_block_size;
for(i = 0; i < total_had_block_size; i++)
{
ai1_zeros_buffer[i] = 0;
}
ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
ht_offset = -had_block_size;
wd_offset = -had_block_size;
index_satd = 0;
/*Loop over all 8x8 blocsk in the CTB*/
for(i = 0; i < num_comp_had_blocks; i++)
{
if(i % num_horz_blocks == 0)
{
wd_offset = -had_block_size;
ht_offset += had_block_size;
}
wd_offset += had_block_size;
if(!is_hbd)
{
/* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
pu1_l0_block = s_cu_prms.pu1_luma_src +
ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
ps_ctxt->ai4_source_satd_8x8[index_satd] =
ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
pu1_l0_block,
ps_curr_src_bufs->i4_y_strd,
ai1_zeros_buffer,
had_block_size,
pi2_residue_had_zscan,
had_block_size);
}
index_satd++;
}
}
if(ps_ctxt->u1_enable_psyRDOPT)
{
/* declare local variables */
WORD32 i;
WORD32 ctb_size;
WORD32 num_comp_had_blocks;
UWORD8 *pu1_l0_block;
UWORD8 *pu1_l0_block_prev = NULL;
WORD32 block_ht;
WORD32 block_wd;
WORD32 ht_offset;
WORD32 wd_offset;
WORD32 num_horz_blocks;
WORD32 had_block_size;
WORD16 pi2_residue_had[64];
UWORD8 ai1_zeros_buffer[64];
WORD32 index_satd = 0;
WORD32 is_hbd;
is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit
/* initialize the variables */
/* change this based ont he bit depth */
// ps_ctxt->u1_chroma_array_type
if(ps_ctxt->u1_chroma_array_type == 1)
{
block_ht = cur_ctb_ht / 2;
block_wd = cur_ctb_wd / 2;
}
else
{
block_ht = cur_ctb_ht;
block_wd = cur_ctb_wd / 2;
}
had_block_size = 4;
memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size;
ht_offset = -had_block_size;
wd_offset = -had_block_size;
if(!is_hbd)
{
/* loop over for every 4x4 blocks in the CU for Cb */
for(i = 0; i < num_comp_had_blocks; i++)
{
if(i % num_horz_blocks == 0)
{
wd_offset = -had_block_size;
ht_offset += had_block_size;
}
wd_offset += had_block_size;
/* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
if(i % 2 != 0)
{
if(!is_hbd)
{
pu1_l0_block = pu1_l0_block_prev + 1;
}
}
else
{
if(!is_hbd)
{
pu1_l0_block = s_cu_prms.pu1_chrm_src +
s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
pu1_l0_block_prev = pu1_l0_block;
}
}
if(had_block_size == 4)
{
if(!is_hbd)
{
ps_ctxt->ai4_source_chroma_satd[index_satd] =
ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
pu1_l0_block,
s_cu_prms.i4_chrm_src_stride,
ai1_zeros_buffer,
had_block_size,
pi2_residue_had,
had_block_size);
}
index_satd++;
} // block size of 4x4
} // for all blocks
} // is hbd check
}
ihevce_cu_recurse_decide(
ps_ctxt,
&s_cu_prms,
ps_cu_tree_analyse,
ps_cu_tree_analyse,
ps_ctb_ipe_analyse,
ps_cu_me_data,
&ps_ctb_col_pu,
&s_cu_update_prms,
pu1_row_pu_map,
&col_pu_map_idx,
i4_tree_depth,
ctb_ctr << 6,
vert_ctr << 6,
cur_ctb_ht);
if(ps_ctxt->i1_slice_type != ISLICE)
{
ASSERT(
(cur_ctb_wd * cur_ctb_ht) <=
ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
}
/*If Sup pic rc is enabled*/
if(1 == ps_ctxt->i4_sub_pic_level_rc)
{
/*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
ihevce_sub_pic_rc_in_data(
(void *)ps_multi_thrd_ctxt,
(void *)ps_ctxt,
(void *)ps_ctb_ipe_analyse,
(void *)ps_frm_ctb_prms);
}
ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
} /* End of CU recursion block */
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
{
ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
do
{
ihevce_update_final_cu_results(
ps_ctxt,
ps_enc_out_ctxt,
ps_cu_prms,
NULL, /* &ps_ctb_col_pu */
NULL, /* &col_pu_map_idx */
&s_cu_update_prms,
ctb_ctr,
vert_ctr);
ps_enc_out_ctxt++;
ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
} while(ps_enc_out_ctxt->u1_cu_size != 128);
}
#else
if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
{
ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
do
{
ihevce_update_final_cu_results(
ps_ctxt,
ps_enc_out_ctxt,
ps_cu_prms,
NULL, /* &ps_ctb_col_pu */
NULL, /* &col_pu_map_idx */
&s_cu_update_prms,
ctb_ctr,
vert_ctr);
ps_enc_out_ctxt++;
ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
} while(ps_enc_out_ctxt->u1_cu_size != 128);
}
#endif
/* --- ctb level copy of data to left buffers--*/
((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
{
/* For the Unaligned CTB, make the invalid edge boundary strength 0 */
ihevce_bs_clear_invalid(
&ps_ctxt->s_deblk_bs_prms,
last_ctb_row_flag,
(ctb_ctr == (num_ctbs_horz_pic - 1)),
last_hz_ctb_wd,
last_vt_ctb_ht);
/* -----------------Read boundary strengts for current CTB------------- */
if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
{
/*Storing boundary strengths of current CTB*/
UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
}
//Increment for storing next CTB info
s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
(ctb_size >> 3); //one vertical edge per 8x8 block
s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
(ctb_size >> 3); //one horizontal edge per 8x8 block
}
/* -------------- ctb level updates ----------------- */
ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
/* first ctb offset will be populated by the caller */
if(0 != ctb_ctr)
{
pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
}
pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
ps_ctb_in++;
ps_ctb_out++;
}
/* ---------- Encloop end of row updates ----------------- */
/* at the end of row processing cu pixel counter is set to */
/* (num ctb * ctbzise) + ctb size */
/* this is to set the dependency for right most cu of last */
/* ctb's top right data dependency */
/* this even takes care of entropy dependency for */
/* incomplete ctb as well */
ihevce_dmgr_set_row_row_sync(
pv_dep_mngr_enc_loop_cu_top_right,
(ctb_ctr * ctb_size + ctb_size),
vert_ctr,
ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
/* Restore structure.
Getting the address of stored-BS and Qp-map and other info */
memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
{
/* Update the pointers to the tile start */
s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
(ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
(ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
}
#if PROFILE_ENC_REG_DATA
s_profile.u8_enc_reg_data[vert_ctr] = 0;
#endif
/* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
if(!ps_ctxt->u1_is_input_data_hbd)
{
WORD32 last_col_pic, last_col_tile;
for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
{
/* store the ctb level prms in cu prms */
s_cu_prms.i4_ctb_pos = ctb_ctr;
s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
/* If last ctb in the horizontal row */
if(ctb_ctr == (num_ctbs_horz_pic - 1))
{
last_col_pic = 1;
}
else
{
last_col_pic = 0;
}
/* If last ctb in the tile row */
if(ctb_ctr == (ctb_end - 1))
{
last_col_tile = 1;
}
else
{
last_col_tile = 0;
}
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
{
/* Wait till top neighbour CTB has done it's deblocking*/
if(ctb_ctr < (ctb_end)-1)
{
ihevce_dmgr_chk_row_row_sync(
pv_dep_mngr_enc_loop_dblk,
ctb_ctr,
dblk_offset,
dblk_check_dep_pos,
ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
ps_ctxt->thrd_id);
}
if((0 == ps_ctxt->i4_deblock_type))
{
/* Populate Qp-map */
if(ctb_start == ctb_ctr)
{
ihevce_deblk_populate_qp_map(
ps_ctxt,
&s_deblk_ctb_row_params,
ps_ctb_out_dblk,
vert_ctr,
ps_frm_ctb_prms,
ps_tile_params);
}
ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
/* recon pointers and stride */
ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
{
ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
(ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
}
ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
//or according to slice boundary. Support yet to be added !!!!
ihevce_deblk_ctb(
&ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
//Increment for storing next CTB info
s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
(ctb_size >> 3); //one vertical edge per 8x8 block
s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
(ctb_size >> 3); //one horizontal edge per 8x8 block
s_deblk_ctb_row_params.pi1_ctb_row_qp +=
(ctb_size >> 2); //one qp per 4x4 block.
} //end of if((0 == ps_ctxt->i4_deblock_type)
} // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
/* Apply SAO over the previous CTB-row */
if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
{
sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
if((vert_ctr > ps_tile_params->i4_first_ctb_y) &&
(ctb_ctr > ctb_start)) //if((vert_ctr > 0) && (ctb_ctr > 0))
{
/* Call the sao function to do sao for the current ctb*/
/* Register the curr ctb's x pos in sao context*/
ps_sao_ctxt->i4_ctb_x = ctb_ctr - 1;
/* Register the curr ctb's y pos in sao context*/
ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
(vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz +
(ctb_ctr - 1);
ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
ps_sao_ctxt->i4_is_last_ctb_row = 0;
ps_sao_ctxt->i4_is_last_ctb_col = 0;
/* Calculate the recon buf pointer and stride for teh current ctb */
ps_sao_ctxt->pu1_cur_luma_recon_buf =
ps_sao_ctxt->pu1_frm_luma_recon_buf +
(ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
ps_sao_ctxt->pu1_cur_chroma_recon_buf =
ps_sao_ctxt->pu1_frm_chroma_recon_buf +
(ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_recon_stride =
ps_sao_ctxt->i4_frm_chroma_recon_stride;
ps_sao_ctxt->pu1_cur_luma_src_buf =
ps_sao_ctxt->pu1_frm_luma_src_buf +
(ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
ps_sao_ctxt->pu1_cur_chroma_src_buf =
ps_sao_ctxt->pu1_frm_chroma_src_buf +
(ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
/* Calculate the pointer to buff to store the (x,y)th sao
* for the top merge of (x,y+1)th ctb
*/
ps_sao_ctxt->ps_top_ctb_sao =
&ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
[ps_sao_ctxt->i4_ctb_x +
(ps_sao_ctxt->i4_ctb_y) *
ps_frm_ctb_prms->i4_num_ctbs_horz +
(ps_ctxt->i4_bitrate_instance_num *
ps_sao_ctxt->i4_num_ctb_units)];
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_luma =
ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
{
UWORD32 u4_ctb_sao_bits;
ihevce_sao_analyse(
&ps_ctxt->s_sao_ctxt_t,
ps_ctb_out_sao,
&u4_ctb_sao_bits,
ps_tile_params);
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_bits += u4_ctb_sao_bits;
}
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
0x1) /** Subpel generation not done for non-ref picture **/
{
/* Padding and Subpel Plane Generation */
ihevce_pad_interp_recon_ctb(
ps_pad_interp_recon,
ctb_ctr - 1,
vert_ctr - 1,
ps_ctxt->i4_quality_preset,
ps_frm_ctb_prms,
ps_ctxt->ai2_scratch,
ps_ctxt->i4_bitrate_instance_num,
ps_ctxt->ps_func_selector);
}
}
/* Call the sao function again for the last ctb of the previous row*/
if(((ctb_ctr + 1) == (ctb_end)) &&
(vert_ctr >
ps_tile_params
->i4_first_ctb_y)) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz) && (vert_ctr > 0) )
{
/* Register the curr ctb's x pos in sao context*/
ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
/* Register the curr ctb's y pos in sao context*/
ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr - 1;
ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
(vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
ps_tile_params->i4_curr_tile_width);
ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 0;
ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
/* Calculate the recon buf pointer and stride for teh current ctb */
ps_sao_ctxt->pu1_cur_luma_recon_buf =
ps_sao_ctxt->pu1_frm_luma_recon_buf +
(ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
ps_sao_ctxt->pu1_cur_chroma_recon_buf =
ps_sao_ctxt->pu1_frm_chroma_recon_buf +
(ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_recon_stride =
ps_sao_ctxt->i4_frm_chroma_recon_stride;
ps_sao_ctxt->pu1_cur_luma_src_buf =
ps_sao_ctxt->pu1_frm_luma_src_buf +
(ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
ps_sao_ctxt->pu1_cur_chroma_src_buf =
ps_sao_ctxt->pu1_frm_chroma_src_buf +
(ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
/* Calculate the pointer to buff to store the (x,y)th sao
* for the top merge of (x,y+1)th ctb
*/
ps_sao_ctxt->ps_top_ctb_sao =
&ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
[ps_sao_ctxt->i4_ctb_x +
(ps_sao_ctxt->i4_ctb_y) *
ps_frm_ctb_prms->i4_num_ctbs_horz +
(ps_ctxt->i4_bitrate_instance_num *
ps_sao_ctxt->i4_num_ctb_units)];
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_luma =
ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
{
UWORD32 u4_ctb_sao_bits;
ihevce_sao_analyse(
&ps_ctxt->s_sao_ctxt_t,
ps_ctb_out_sao,
&u4_ctb_sao_bits,
ps_tile_params);
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_bits += u4_ctb_sao_bits;
}
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
0x1) /** Subpel generation not done for non-ref picture **/
{
/* Padding and Subpel Plane Generation */
ihevce_pad_interp_recon_ctb(
ps_pad_interp_recon,
ctb_ctr,
vert_ctr - 1,
ps_ctxt->i4_quality_preset,
ps_frm_ctb_prms,
ps_ctxt->ai2_scratch,
ps_ctxt->i4_bitrate_instance_num,
ps_ctxt->ps_func_selector);
}
}
}
else //SAO Disabled
{
if(1 == ps_ctxt->i4_deblk_pad_hpel_cur_pic)
{
/* Padding and Subpel Plane Generation */
ihevce_pad_interp_recon_ctb(
ps_pad_interp_recon,
ctb_ctr,
vert_ctr,
ps_ctxt->i4_quality_preset,
ps_frm_ctb_prms,
ps_ctxt->ai2_scratch,
ps_ctxt->i4_bitrate_instance_num,
ps_ctxt->ps_func_selector);
}
}
/* update the number of ctbs deblocked for this row */
ihevce_dmgr_set_row_row_sync(
pv_dep_mngr_enc_loop_dblk,
(ctb_ctr + 1),
vert_ctr,
ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
} //end of loop over CTBs in current CTB-row
{
if(!ps_ctxt->i4_bitrate_instance_num)
{
if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
{
/* If SAO is on, then signal completion of previous CTB row */
if(0 != vert_ctr)
{
{
WORD32 post_ctb_ctr;
for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
{
ihevce_dmgr_map_set_sync(
pv_dep_mngr_me_dep_encloop,
post_ctb_ctr,
(vert_ctr - 1),
MAP_CTB_COMPLETE);
}
}
}
}
else
{
{
WORD32 post_ctb_ctr;
for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
{
ihevce_dmgr_map_set_sync(
pv_dep_mngr_me_dep_encloop,
post_ctb_ctr,
vert_ctr,
MAP_CTB_COMPLETE);
}
}
}
}
}
/* Call the sao function again for the last ctb row of frame */
if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
{
sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
{
if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
(ctb_ctr >
ctb_start)) //((vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) && (ctb_ctr > 0))
{
/* Register the curr ctb's x pos in sao context*/
ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr - 1;
/* Register the curr ctb's y pos in sao context*/
ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
(vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr - 1);
ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
ps_tile_params->i4_curr_tile_height);
ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
/* Calculate the recon buf pointer and stride for teh current ctb */
ps_sao_ctxt->pu1_cur_luma_recon_buf =
ps_sao_ctxt->pu1_frm_luma_recon_buf +
(ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
ps_sao_ctxt->pu1_cur_chroma_recon_buf =
ps_sao_ctxt->pu1_frm_chroma_recon_buf +
(ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_recon_stride =
ps_sao_ctxt->i4_frm_chroma_recon_stride;
ps_sao_ctxt->pu1_cur_luma_src_buf =
ps_sao_ctxt->pu1_frm_luma_src_buf +
(ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
ps_sao_ctxt->pu1_cur_chroma_src_buf =
ps_sao_ctxt->pu1_frm_chroma_src_buf +
(ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
/* Calculate the pointer to buff to store the (x,y)th sao
* for the top merge of (x,y+1)th ctb
*/
ps_sao_ctxt->ps_top_ctb_sao =
&ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
[ps_sao_ctxt->i4_ctb_x +
(ps_sao_ctxt->i4_ctb_y) *
ps_frm_ctb_prms->i4_num_ctbs_horz +
(ps_ctxt->i4_bitrate_instance_num *
ps_sao_ctxt->i4_num_ctb_units)];
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_luma =
ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
{
UWORD32 u4_ctb_sao_bits;
ihevce_sao_analyse(
&ps_ctxt->s_sao_ctxt_t,
ps_ctb_out_sao,
&u4_ctb_sao_bits,
ps_tile_params);
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_bits += u4_ctb_sao_bits;
}
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
0x1) /** Subpel generation not done for non-ref picture **/
{
/* Padding and Subpel Plane Generation */
ihevce_pad_interp_recon_ctb(
ps_pad_interp_recon,
ctb_ctr - 1,
vert_ctr,
ps_ctxt->i4_quality_preset,
ps_frm_ctb_prms,
ps_ctxt->ai2_scratch,
ps_ctxt->i4_bitrate_instance_num,
ps_ctxt->ps_func_selector);
}
}
/* Call the sao function again for the last ctb of the last ctb row of frame */
if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
((ctb_ctr + 1) ==
(ctb_end))) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz))
{
/* Register the curr ctb's x pos in sao context*/
ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
/* Register the curr ctb's y pos in sao context*/
ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
(vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
ps_tile_params->i4_curr_tile_width);
ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
ps_tile_params->i4_curr_tile_height);
ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
/* Calculate the recon buf pointer and stride for teh current ctb */
ps_sao_ctxt->pu1_cur_luma_recon_buf =
ps_sao_ctxt->pu1_frm_luma_recon_buf +
(ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
ps_sao_ctxt->pu1_cur_chroma_recon_buf =
ps_sao_ctxt->pu1_frm_chroma_recon_buf +
(ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_recon_stride =
ps_sao_ctxt->i4_frm_chroma_recon_stride;
ps_sao_ctxt->pu1_cur_luma_src_buf =
ps_sao_ctxt->pu1_frm_luma_src_buf +
(ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
ps_sao_ctxt->pu1_cur_chroma_src_buf =
ps_sao_ctxt->pu1_frm_chroma_src_buf +
(ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
(ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
(ps_sao_ctxt->i4_ctb_x * ctb_size);
ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
/* Calculate the pointer to buff to store the (x,y)th sao
* for the top merge of (x,y+1)th ctb
*/
ps_sao_ctxt->ps_top_ctb_sao =
&ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
[ps_sao_ctxt->i4_ctb_x +
ps_sao_ctxt->i4_ctb_y *
ps_frm_ctb_prms->i4_num_ctbs_horz +
(ps_ctxt->i4_bitrate_instance_num *
ps_sao_ctxt->i4_num_ctb_units)];
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_luma =
ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
/* Calculate the pointer to buff to store the top pixels of curr ctb*/
ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
(ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
ps_sao_ctxt->i4_ctb_x * ctb_size +
ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
ps_sao_ctxt->i4_top_chroma_buf_size);
{
UWORD32 u4_ctb_sao_bits;
ihevce_sao_analyse(
&ps_ctxt->s_sao_ctxt_t,
ps_ctb_out_sao,
&u4_ctb_sao_bits,
ps_tile_params);
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
ps_ctxt
->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
[ps_ctxt->i4_bitrate_instance_num]
->u4_frame_rdopt_bits += u4_ctb_sao_bits;
}
if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
0x1) /** Subpel generation not done for non-ref picture **/
{
/* Padding and Subpel Plane Generation */
ihevce_pad_interp_recon_ctb(
ps_pad_interp_recon,
ctb_ctr,
vert_ctr,
ps_ctxt->i4_quality_preset,
ps_frm_ctb_prms,
ps_ctxt->ai2_scratch,
ps_ctxt->i4_bitrate_instance_num,
ps_ctxt->ps_func_selector);
}
}
} //end of loop over CTBs in current CTB-row
/* If SAO is on, then signal completion of the last CTB row of frame */
{
if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
{
if(!ps_ctxt->i4_bitrate_instance_num)
{
{
WORD32 post_ctb_ctr;
for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
{
ihevce_dmgr_map_set_sync(
pv_dep_mngr_me_dep_encloop,
post_ctb_ctr,
vert_ctr,
MAP_CTB_COMPLETE);
}
}
}
}
}
}
}
return;
}
/*!
******************************************************************************
* \if Function name : ihevce_enc_loop_pass \endif
*
* \brief
* Frame level enc_loop pass function