| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| |
| /*! |
| ****************************************************************************** |
| * \file ihevce_enc_loop_utils.c |
| * |
| * \brief |
| * This file contains utility functions of Encode loop |
| * |
| * \date |
| * 18/09/2012 |
| * |
| * \author |
| * Ittiam |
| * |
| * |
| * List of Functions |
| * |
| * |
| ****************************************************************************** |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <math.h> |
| #include <limits.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_macros.h" |
| #include "ihevc_debug.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| #include "ihevc_common_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_hle_interface.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_multi_thrd_funcs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_ipe_instr_set_router.h" |
| #include "ihevce_decomp_pre_intra_structs.h" |
| #include "ihevce_decomp_pre_intra_pass.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_nbr_avail.h" |
| #include "ihevce_enc_loop_utils.h" |
| #include "ihevce_sub_pic_rc.h" |
| #include "ihevce_global_tables.h" |
| #include "ihevce_bs_compute_ctb.h" |
| #include "ihevce_cabac_rdo.h" |
| #include "ihevce_deblk.h" |
| #include "ihevce_frame_process.h" |
| #include "ihevce_rc_enc_structs.h" |
| #include "hme_datatype.h" |
| #include "hme_interface.h" |
| #include "hme_common_defs.h" |
| #include "hme_defs.h" |
| #include "hme_common_utils.h" |
| #include "ihevce_me_instr_set_router.h" |
| #include "ihevce_enc_subpel_gen.h" |
| #include "ihevce_inter_pred.h" |
| #include "ihevce_mv_pred.h" |
| #include "ihevce_mv_pred_merge.h" |
| #include "ihevce_enc_loop_inter_mode_sifter.h" |
| #include "ihevce_enc_cu_recursion.h" |
| #include "ihevce_enc_loop_pass.h" |
| #include "ihevce_common_utils.h" |
| #include "ihevce_dep_mngr_interface.h" |
| #include "ihevce_sao.h" |
| #include "ihevce_tile_interface.h" |
| #include "ihevce_profile.h" |
| #include "ihevce_stasino_helpers.h" |
| #include "ihevce_tu_tree_selector.h" |
| |
| /*****************************************************************************/ |
| /* Globals */ |
| /*****************************************************************************/ |
| |
| extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2]; |
| extern const UWORD8 gu1_hevce_scan4x4[3][16]; |
| extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16]; |
| extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16]; |
| extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16]; |
| |
| /*****************************************************************************/ |
| /* Constant Macros */ |
| /*****************************************************************************/ |
| #define ENABLE_ZERO_CBF 1 |
| #define DISABLE_RDOQ_INTRA 0 |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| void *ihevce_tu_tree_update( |
| tu_prms_t *ps_tu_prms, |
| WORD32 *pnum_tu_in_cu, |
| WORD32 depth, |
| WORD32 tu_split_flag, |
| WORD32 tu_early_cbf, |
| WORD32 i4_x_off, |
| WORD32 i4_y_off) |
| { |
| //WORD32 tu_split_flag = p_tu_split_flag[0]; |
| WORD32 p_tu_split_flag[4]; |
| WORD32 p_tu_early_cbf[4]; |
| |
| WORD32 tu_size = ps_tu_prms->u1_tu_size; |
| |
| if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1)) |
| { |
| if((tu_size >> depth) == 32) |
| { |
| /* Get the individual TU split flags */ |
| p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F; |
| p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F; |
| p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F; |
| p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F; |
| |
| /* Get the early CBF flags */ |
| p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F; |
| p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F; |
| p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F; |
| p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F; |
| } |
| else |
| { |
| /* Get the individual TU split flags */ |
| p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1); |
| p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1); |
| p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1); |
| p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1); |
| |
| /* Get the early CBF flags */ |
| p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1); |
| p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1); |
| p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1); |
| p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1); |
| } |
| |
| ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
| ps_tu_prms, |
| pnum_tu_in_cu, |
| depth + 1, |
| p_tu_split_flag[0], |
| p_tu_early_cbf[0], |
| i4_x_off, |
| i4_y_off); |
| |
| ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
| ps_tu_prms, |
| pnum_tu_in_cu, |
| depth + 1, |
| p_tu_split_flag[1], |
| p_tu_early_cbf[1], |
| (i4_x_off + (tu_size >> (depth + 1))), |
| i4_y_off); |
| |
| ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
| ps_tu_prms, |
| pnum_tu_in_cu, |
| depth + 1, |
| p_tu_split_flag[2], |
| p_tu_early_cbf[2], |
| i4_x_off, |
| (i4_y_off + (tu_size >> (depth + 1)))); |
| |
| ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
| ps_tu_prms, |
| pnum_tu_in_cu, |
| depth + 1, |
| p_tu_split_flag[3], |
| p_tu_early_cbf[3], |
| (i4_x_off + (tu_size >> (depth + 1))), |
| (i4_y_off + (tu_size >> (depth + 1)))); |
| } |
| else |
| { |
| if(tu_split_flag & 0x1) |
| { |
| /* This piece of code will be entered for the 8x8, if it is split |
| Update the 4 child TU's accordingly. */ |
| |
| (*pnum_tu_in_cu) += 4; |
| |
| /* TL TU update */ |
| ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
| |
| ps_tu_prms->u1_x_off = i4_x_off; |
| |
| ps_tu_prms->u1_y_off = i4_y_off; |
| |
| /* Early CBF is not done for 4x4 transforms */ |
| ps_tu_prms->i4_early_cbf = 1; |
| |
| ps_tu_prms++; |
| |
| /* TR TU update */ |
| ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
| |
| ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1)); |
| |
| ps_tu_prms->u1_y_off = i4_y_off; |
| |
| /* Early CBF is not done for 4x4 transforms */ |
| ps_tu_prms->i4_early_cbf = 1; |
| |
| ps_tu_prms++; |
| |
| /* BL TU update */ |
| ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
| |
| ps_tu_prms->u1_x_off = i4_x_off; |
| |
| ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1)); |
| |
| /* Early CBF is not done for 4x4 transforms */ |
| ps_tu_prms->i4_early_cbf = 1; |
| |
| ps_tu_prms++; |
| |
| /* BR TU update */ |
| ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
| |
| ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1)); |
| |
| ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1)); |
| |
| /* Early CBF is not done for 4x4 transforms */ |
| ps_tu_prms->i4_early_cbf = 1; |
| } |
| else |
| { |
| /* Update the TU params */ |
| ps_tu_prms->u1_tu_size = tu_size >> depth; |
| |
| ps_tu_prms->u1_x_off = i4_x_off; |
| |
| ps_tu_prms->u1_y_off = i4_y_off; |
| |
| (*pnum_tu_in_cu)++; |
| |
| /* Early CBF update for current TU */ |
| ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1; |
| } |
| if((*pnum_tu_in_cu) < MAX_TU_IN_CTB) |
| { |
| ps_tu_prms++; |
| |
| ps_tu_prms->u1_tu_size = tu_size; |
| } |
| } |
| |
| return ps_tu_prms; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_compute_quant_rel_param \endif |
| * |
| * \brief |
| * This function updates quantization related parameters like qp_mod_6 etc in |
| * context according to new qp |
| * |
| * \date |
| * 08/01/2013 |
| * |
| * \author |
| * Ittiam |
| * |
| * \return |
| * |
| * List of Functions |
| * |
| * |
| ****************************************************************************** |
| */ |
| void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp) |
| { |
| WORD32 i4_div_factor; |
| |
| ps_ctxt->i4_chrm_cu_qp = |
| (ps_ctxt->u1_chroma_array_type == 2) |
| ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51) |
| : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET]; |
| ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; |
| i4_div_factor = (i1_cu_qp + 3) / 6; |
| i4_div_factor = CLIP3(i4_div_factor, 3, 6); |
| ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; |
| ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; |
| ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; |
| |
| #define INTER_RND_QP_BY_6 |
| #ifdef INTER_RND_QP_BY_6 |
| /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ |
| { |
| ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = |
| (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f); |
| } |
| #else |
| /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ |
| ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3; |
| #endif |
| |
| if(ISLICE == ps_ctxt->i1_slice_type) |
| { |
| /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ |
| ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
| (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f); |
| } |
| else |
| { |
| if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/ |
| { |
| /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ |
| ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
| (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f); |
| } |
| else |
| { |
| /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */ |
| ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
| ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER]; |
| /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */ |
| } |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_populate_cl_cu_lambda_prms \endif |
| * |
| * \brief |
| * Function whihc calculates the Lambda params for current picture |
| * |
| * \param[in] ps_enc_ctxt : encoder ctxt pointer |
| * \param[in] ps_cur_pic_ctxt : current pic ctxt |
| * \param[in] i4_cur_frame_qp : current pic QP |
| * \param[in] first_field : is first field flag |
| * \param[in] i4_temporal_lyr_id : Current picture layer id |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_populate_cl_cu_lambda_prms( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| frm_lambda_ctxt_t *ps_frm_lamda, |
| WORD32 i4_slice_type, |
| WORD32 i4_temporal_lyr_id, |
| WORD32 i4_lambda_type) |
| { |
| WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset; |
| double lambda_modifier; |
| double lambda_uv_modifier; |
| double lambda; |
| double lambda_uv; |
| |
| WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8); |
| |
| /*Populate lamda modifier */ |
| ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier; |
| ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier; |
| ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id; |
| |
| for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; |
| i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
| i4_curr_cu_qp++) |
| { |
| WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV) |
| ? MIN(i4_curr_cu_qp, 51) |
| : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET]; |
| |
| i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; |
| |
| lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| |
| if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id)) |
| { |
| lambda_modifier = ps_frm_lamda->lambda_modifier * |
| CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00); |
| lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier * |
| CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00); |
| } |
| else |
| { |
| lambda_modifier = ps_frm_lamda->lambda_modifier; |
| lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier; |
| } |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| if(ISLICE == ps_ctxt->i1_slice_type) |
| { |
| lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
| lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
| } |
| else |
| { |
| lambda_modifier = CONST_LAMDA_MOD_VAL; |
| lambda_uv_modifier = CONST_LAMDA_MOD_VAL; |
| } |
| } |
| switch(i4_lambda_type) |
| { |
| case 0: |
| { |
| i4_qp_bdoffset = 0; |
| |
| lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| |
| lambda *= lambda_modifier; |
| lambda_uv *= lambda_uv_modifier; |
| |
| ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
| (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| else |
| { |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| |
| ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset]; |
| |
| ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset]; |
| |
| ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset]; |
| |
| ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset]; |
| |
| break; |
| } |
| case 1: |
| { |
| lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| |
| lambda *= lambda_modifier; |
| lambda_uv *= lambda_uv_modifier; |
| |
| ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
| (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| else |
| { |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset]; |
| |
| ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset]; |
| |
| ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset]; |
| |
| ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = |
| ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset]; |
| |
| break; |
| } |
| case 2: |
| { |
| lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
| |
| lambda *= lambda_modifier; |
| lambda_uv *= lambda_uv_modifier; |
| |
| ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
| (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
| |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| else |
| { |
| ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| |
| /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/ |
| lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0)); |
| lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0)); |
| |
| lambda *= lambda_modifier; |
| lambda_uv *= lambda_uv_modifier; |
| |
| ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
| (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
| |
| ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
| (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| else |
| { |
| ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
| } |
| |
| ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = |
| (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
| |
| break; |
| } |
| default: |
| { |
| /* Intended to be a barren wasteland! */ |
| ASSERT(0); |
| } |
| } |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_get_cl_cu_lambda_prms \endif |
| * |
| * \brief |
| * Function whihc calculates the Lambda params for current picture |
| * |
| * \param[in] ps_enc_ctxt : encoder ctxt pointer |
| * \param[in] ps_cur_pic_ctxt : current pic ctxt |
| * \param[in] i4_cur_frame_qp : current pic QP |
| * \param[in] first_field : is first field flag |
| * \param[in] i4_temporal_lyr_id : Current picture layer id |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp) |
| { |
| WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2) |
| ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51) |
| : gai1_ihevc_chroma_qp_scale |
| [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET]; |
| |
| /* closed loop ssd lambda is same as final lambda */ |
| ps_ctxt->i8_cl_ssd_lambda_qf = |
| ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
| ps_ctxt |
| ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
| ps_ctxt->u4_chroma_cost_weighing_factor = |
| ps_ctxt->au4_chroma_cost_weighing_factor_array |
| [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
| /* --- Initialized the lambda for SATD computations --- */ |
| /* --- 0.95 is the multiplication factor as per HM --- */ |
| /* --- 1.9 is the multiplication factor for Hadamard Transform --- */ |
| ps_ctxt->i4_satd_lamda = |
| ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
| ps_ctxt->i4_sad_lamda = |
| ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_update_pred_qp \endif |
| * |
| * \brief |
| * Computes pred qp for the given CU |
| * |
| * \param[in] |
| * |
| * \return |
| * |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y) |
| { |
| WORD32 i4_pred_qp = 0x7FFFFFFF; |
| WORD32 i4_top, i4_left; |
| if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/ |
| { |
| i4_pred_qp = ps_ctxt->i4_prev_QP; |
| } |
| else |
| { |
| if(cu_pos_y == 0) /*CTB boundary*/ |
| { |
| i4_top = ps_ctxt->i4_prev_QP; |
| } |
| else /*within CTB*/ |
| { |
| i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)]; |
| } |
| if(cu_pos_x == 0) /*CTB boundary*/ |
| { |
| i4_left = ps_ctxt->i4_prev_QP; |
| } |
| else /*within CTB*/ |
| { |
| i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)]; |
| } |
| i4_pred_qp = (i4_left + i4_top + 1) >> 1; |
| } |
| ps_ctxt->i4_pred_qp = i4_pred_qp; |
| return; |
| } |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_compute_cu_level_QP \endif |
| * |
| * \brief |
| * Computes cu level QP with Traqo,Spatial Mod and In-frame RC |
| * |
| * \param[in] |
| * |
| * \return |
| * |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_compute_cu_level_QP( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| WORD32 i4_activity_for_qp, |
| WORD32 i4_activity_for_lamda, |
| WORD32 i4_reduce_qp) |
| { |
| /*modify quant related param in ctxt based on current cu qp*/ |
| WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp; |
| WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; |
| |
| WORD32 i4_max_qp_allowed; |
| WORD32 i4_min_qp_allowed; |
| WORD32 i4_pred_qp; |
| |
| i4_pred_qp = ps_ctxt->i4_pred_qp; |
| |
| if(ps_ctxt->i4_sub_pic_level_rc) |
| { |
| i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2))); |
| i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2))); |
| } |
| else |
| { |
| i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4))); |
| i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4))); |
| } |
| if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) |
| return; |
| |
| #if LAMDA_BASED_ON_QUANT |
| i4_activity_for_lamda = i4_activity_for_qp; |
| #endif |
| |
| if(i4_activity_for_qp != -1) |
| { |
| cu_qp = (ps_ctxt->ps_rc_quant_ctxt |
| ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]); |
| if(ps_ctxt->i4_qp_mod) |
| { |
| /*Recompute the Qp as per enc thread's frame level Qp*/ |
| ASSERT(i4_activity_for_qp > 0); |
| cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> |
| QP_LEVEL_MOD_ACT_FACTOR; |
| } |
| |
| // To avoid access of uninitialised Qscale to qp conversion table |
| if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale; |
| else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale; |
| |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp]; |
| |
| if((1 == i4_reduce_qp) && (cu_qp > 1)) |
| cu_qp--; |
| |
| /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/ |
| if(cu_qp > i4_max_qp_allowed) |
| cu_qp = i4_max_qp_allowed; |
| else if(cu_qp < i4_min_qp_allowed) |
| cu_qp = i4_min_qp_allowed; |
| |
| /* CLIP to maintain Qp between user configured and min and max Qp values*/ |
| if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
| else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; |
| |
| /*cu qp must be populated in cu_analyse_t struct*/ |
| ps_ctxt->i4_cu_qp = cu_qp; |
| /*recompute quant related param at every cu level*/ |
| ihevce_compute_quant_rel_param(ps_ctxt, cu_qp); |
| } |
| |
| /*Decoupling qp and lamda calculation */ |
| if(i4_activity_for_lamda != -1) |
| { |
| cu_qp = (ps_ctxt->ps_rc_quant_ctxt |
| ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]); |
| |
| if(ps_ctxt->i4_qp_mod) |
| { |
| #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON |
| /*Recompute the Qp as per enc thread's frame level Qp*/ |
| ASSERT(i4_activity_for_lamda > 0); |
| cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> |
| QP_LEVEL_MOD_ACT_FACTOR; |
| #endif |
| } |
| if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale; |
| else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale; |
| |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp]; |
| |
| /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/ |
| if(cu_qp > i4_max_qp_allowed) |
| cu_qp = i4_max_qp_allowed; |
| else if(cu_qp < i4_min_qp_allowed) |
| cu_qp = i4_min_qp_allowed; |
| |
| /* CLIP to maintain Qp between user configured and min and max Qp values*/ |
| if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
| else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp) |
| cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; |
| /* get frame level lambda params */ |
| ihevce_get_cl_cu_lambda_prms( |
| ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp); |
| } |
| } |
| |
| void ihevce_update_cu_level_qp_lamda( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra) |
| { |
| WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0; |
| |
| if(ps_cu_analyse->u1_cu_size == 64) |
| { |
| ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4)); |
| i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4)); |
| i4_act_counter_lamda = 3; |
| } |
| else if(ps_cu_analyse->u1_cu_size == 32) |
| { |
| ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4)); |
| i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4)); |
| i4_act_counter_lamda = 0; |
| } |
| else if(ps_cu_analyse->u1_cu_size == 16) |
| { |
| ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4)); |
| i4_act_counter = (trans_size == 8) || (trans_size == 4); |
| i4_act_counter_lamda = 0; |
| } |
| else if(ps_cu_analyse->u1_cu_size == 8) |
| { |
| ASSERT((trans_size == 8) || (trans_size == 4)); |
| i4_act_counter = 1; |
| i4_act_counter_lamda = 0; |
| } |
| else |
| { |
| ASSERT(0); |
| } |
| |
| if(ps_ctxt->i4_use_ctb_level_lamda) |
| { |
| ihevce_compute_cu_level_QP( |
| ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0); |
| } |
| else |
| { |
| ihevce_compute_cu_level_QP( |
| ps_ctxt, |
| ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], |
| ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra], |
| 0); |
| } |
| |
| ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp; |
| } |
| |
| /** |
| ******************************************************************************* |
| * \if Function name : ihevce_scan_coeffs \endif |
| * |
| * @brief * Computes the coeff buffer for a coded TU for entropy coding |
| * |
| * @par Description |
| * Computes the coeff buffer for a coded TU for entropy coding |
| * |
| * \param[in] pi2_quan_coeffs Quantized coefficient context |
| * |
| * \param[in] scan_idx Scan index specifying the scan order |
| * |
| * \param[in] trans_size Transform unit size |
| * |
| * \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding |
| * |
| * \param[in] pu1_csbf_buf csb flag buffer |
| * |
| * @returns num_bytes |
| * Number of bytes written to pu1_out_data |
| * |
| * @remarks |
| * |
| * \author |
| * Ittiam |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevce_scan_coeffs( |
| WORD16 *pi2_quant_coeffs, |
| WORD32 *pi4_subBlock2csbfId_map, |
| WORD32 scan_idx, |
| WORD32 trans_size, |
| UWORD8 *pu1_out_data, |
| UWORD8 *pu1_csbf_buf, |
| WORD32 i4_csbf_stride) |
| { |
| WORD32 i, trans_unit_idx, num_gt1_flag; |
| UWORD16 u2_csbf0flags; |
| WORD32 num_bytes = 0; |
| UWORD8 *pu1_trans_table; |
| UWORD8 *pu1_csb_table; |
| WORD32 shift_value, mask_value; |
| UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0; |
| UWORD16 u2_sign_flags; |
| UWORD16 u2_abs_coeff_remaining[16]; |
| WORD32 blk_row, blk_col; |
| |
| UWORD8 *pu1_out_data_header; |
| UWORD16 *pu2_out_data_coeff; |
| |
| WORD32 x_pos, y_pos; |
| WORD32 quant_coeff; |
| |
| WORD32 num_gt0_flag; |
| (void)i4_csbf_stride; |
| pu1_out_data_header = pu1_out_data; |
| /* Need only last 3 bits, rest are reserved for debugging and making */ |
| /* WORD alignment */ |
| u2_csbf0flags = 0xBAD0; |
| |
| /* Select proper order for your transform unit and csb based on scan_idx*/ |
| /* and the trans_size */ |
| |
| /* scan order inside a csb */ |
| pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]); |
| /* GETRANGE will give the log_2 of trans_size to shift_value */ |
| GETRANGE(shift_value, trans_size); |
| shift_value = shift_value - 3; /* for finding. row no. from scan index */ |
| mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/ |
| switch(trans_size) |
| { |
| case 32: |
| pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]); |
| break; |
| case 16: |
| pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]); |
| break; |
| case 8: |
| pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]); |
| break; |
| case 4: |
| pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]); |
| break; |
| default: |
| DBG_PRINTF("Invalid Trans Size\n"); |
| return -1; |
| break; |
| } |
| |
| /*go through each csb in the scan order for first non-zero coded sub-block*/ |
| for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--) |
| { |
| /* check for the first csb flag in our scan order */ |
| if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]) |
| { |
| UWORD8 u1_last_x, u1_last_y; |
| /* row of csb */ |
| blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; |
| /* col of csb */ |
| blk_col = pu1_trans_table[trans_unit_idx] & mask_value; |
| |
| /*check for the 1st non-0 values inside the csb in our scan order*/ |
| for(i = 15; i >= 0; i--) |
| { |
| x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; |
| y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; |
| |
| quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; |
| |
| if(quant_coeff != 0) |
| break; |
| } |
| |
| ASSERT(i >= 0); |
| |
| u1_last_x = x_pos; |
| u1_last_y = y_pos; |
| |
| /* storing last_x and last_y */ |
| *pu1_out_data_header = u1_last_x; |
| pu1_out_data_header++; |
| num_bytes++; |
| *pu1_out_data_header = u1_last_y; |
| pu1_out_data_header++; |
| num_bytes++; |
| |
| /* storing the scan order */ |
| *pu1_out_data_header = scan_idx; |
| pu1_out_data_header++; |
| num_bytes++; |
| /* storing last_sub_block pos. in scan order count */ |
| *pu1_out_data_header = trans_unit_idx; |
| pu1_out_data_header++; |
| num_bytes++; |
| |
| /*stored the first 4 bytes, now all are word16. So word16 pointer*/ |
| pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header; |
| |
| /* u2_csbf0flags word */ |
| u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/ |
| /* storing u2_csbf0flags word */ |
| *pu2_out_data_coeff = u2_csbf0flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| |
| num_gt0_flag = 1; |
| num_gt1_flag = 0; |
| u2_sign_flags = 0; |
| |
| /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ |
| u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i); |
| if(abs(quant_coeff) > 1) |
| { |
| /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ |
| u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i); |
| /* update u2_abs_coeff_remaining */ |
| u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; |
| |
| num_gt1_flag++; |
| } |
| |
| if(quant_coeff < 0) |
| { |
| /* set the i th bit of u2_sign_flags */ |
| u2_sign_flags = u2_sign_flags | (1 << i); |
| } |
| |
| /* Test remaining elements in our scan order */ |
| /* Can optimize further by CLZ macro */ |
| for(i = i - 1; i >= 0; i--) |
| { |
| x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; |
| y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; |
| |
| quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; |
| |
| if(quant_coeff != 0) |
| { |
| /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ |
| u2_sig_coeff_abs_gt0_flags |= (1 << i); |
| |
| if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE)) |
| { |
| /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ |
| u2_sig_coeff_abs_gt1_flags |= (1 << i); |
| |
| /* update u2_abs_coeff_remaining */ |
| u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; |
| |
| num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/ |
| } |
| |
| if(quant_coeff < 0) |
| { |
| /* set the i th bit of u2_sign_flags */ |
| u2_sign_flags |= (1 << i); |
| } |
| |
| num_gt0_flag++; |
| } |
| } |
| |
| /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */ |
| *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */ |
| *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| /* storing u2_sign_flags 2 bytes */ |
| *pu2_out_data_coeff = u2_sign_flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| |
| /* Store the u2_abs_coeff_remaining[] */ |
| for(i = 0; i < num_gt1_flag; i++) |
| { |
| /* storing u2_abs_coeff_remaining[i] 2 bytes */ |
| *pu2_out_data_coeff = u2_abs_coeff_remaining[i]; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| } |
| |
| break; /*We just need this loop for finding 1st non-zero csb only*/ |
| } |
| } |
| |
| /* go through remaining csb in the scan order */ |
| for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--) |
| { |
| blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/ |
| blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/ |
| |
| /* u2_csbf0flags word */ |
| u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */ |
| (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]); |
| |
| /********************************************************************/ |
| /* Minor hack: As per HEVC spec csbf in not signalled in stream for */ |
| /* block0, instead sig coeff map is directly signalled. This is */ |
| /* taken care by forcing csbf for block0 to be 1 even if it is 0 */ |
| /********************************************************************/ |
| if(0 == trans_unit_idx) |
| { |
| u2_csbf0flags |= 1; |
| } |
| |
| if((blk_col + 1 < trans_size / 4)) /* checking right boundary */ |
| { |
| if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]]) |
| { |
| /* set the 2nd bit of u2_csbf0flags for right csbf */ |
| u2_csbf0flags = u2_csbf0flags | (1 << 1); |
| } |
| } |
| if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */ |
| { |
| if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]]) |
| { |
| /* set the 3rd bit of u2_csbf0flags for bottom csbf */ |
| u2_csbf0flags = u2_csbf0flags | (1 << 2); |
| } |
| } |
| |
| /* storing u2_csbf0flags word */ |
| *pu2_out_data_coeff = u2_csbf0flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| |
| /* check for the csb flag in our scan order */ |
| if(u2_csbf0flags & 0x1) |
| { |
| u2_sig_coeff_abs_gt0_flags = 0; |
| u2_sig_coeff_abs_gt1_flags = 0; |
| u2_sign_flags = 0; |
| |
| num_gt0_flag = 0; |
| num_gt1_flag = 0; |
| /* check for the non-0 values inside the csb in our scan order */ |
| /* Can optimize further by CLZ macro */ |
| for(i = 15; i >= 0; i--) |
| { |
| x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; |
| y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; |
| |
| quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; |
| |
| if(quant_coeff != 0) |
| { |
| /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ |
| u2_sig_coeff_abs_gt0_flags |= (1 << i); |
| |
| if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE)) |
| { |
| /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ |
| u2_sig_coeff_abs_gt1_flags |= (1 << i); |
| |
| /* update u2_abs_coeff_remaining */ |
| u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; |
| |
| num_gt1_flag++; |
| } |
| |
| if(quant_coeff < 0) |
| { |
| /* set the i th bit of u2_sign_flags */ |
| u2_sign_flags = u2_sign_flags | (1 << i); |
| } |
| |
| num_gt0_flag++; |
| } |
| } |
| |
| /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */ |
| *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| |
| /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */ |
| *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| |
| /* storing u2_sign_flags 2 bytes */ |
| *pu2_out_data_coeff = u2_sign_flags; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| |
| /* Store the u2_abs_coeff_remaining[] */ |
| for(i = 0; i < num_gt1_flag; i++) |
| { |
| /* storing u2_abs_coeff_remaining[i] 2 bytes */ |
| *pu2_out_data_coeff = u2_abs_coeff_remaining[i]; |
| pu2_out_data_coeff++; |
| num_bytes += 2; |
| } |
| } |
| } |
| |
| return num_bytes; /* Return the number of bytes written to out_data */ |
| } |
| |
| /** |
| ******************************************************************************* |
| * \if Function name : ihevce_populate_intra_pred_mode \endif |
| * |
| * \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag & |
| * b5_rem_intra_pred_mode for a CU based on nieghbouring CUs, |
| * |
| * \par Description |
| * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode |
| * for a CU |
| * |
| * \param[in] top_intra_mode Top intra mode |
| * \param[in] left_intra_mode Left intra mode |
| * \param[in] available_top Top availability flag |
| * \param[in] available_left Left availability flag |
| * \param[in] cu_pos_y CU 'y' position |
| * \param[in] ps_cand_mode_list pointer to populate candidate list |
| * |
| * \returns none |
| * |
| * \author |
| * Ittiam |
| * |
| ******************************************************************************* |
| */ |
| |
| void ihevce_populate_intra_pred_mode( |
| WORD32 top_intra_mode, |
| WORD32 left_intra_mode, |
| WORD32 available_top, |
| WORD32 available_left, |
| WORD32 cu_pos_y, |
| WORD32 *ps_cand_mode_list) |
| { |
| /* local variables */ |
| WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; |
| |
| /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
| /* N = top */ |
| if(0 == available_top) |
| { |
| cand_intra_pred_mode_top = INTRA_DC; |
| } |
| /* for neighbour != INTRA, setting DC is done outside */ |
| else if(0 == cu_pos_y) /* It's on the CTB boundary */ |
| { |
| cand_intra_pred_mode_top = INTRA_DC; |
| } |
| else |
| { |
| cand_intra_pred_mode_top = top_intra_mode; |
| } |
| |
| /* N = left */ |
| if(0 == available_left) |
| { |
| cand_intra_pred_mode_left = INTRA_DC; |
| } |
| /* for neighbour != INTRA, setting DC is done outside */ |
| else |
| { |
| cand_intra_pred_mode_left = left_intra_mode; |
| } |
| |
| /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
| if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) |
| { |
| if(cand_intra_pred_mode_left < 2) |
| { |
| ps_cand_mode_list[0] = INTRA_PLANAR; |
| ps_cand_mode_list[1] = INTRA_DC; |
| ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ |
| } |
| else |
| { |
| ps_cand_mode_list[0] = cand_intra_pred_mode_left; |
| ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); |
| ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); |
| } |
| } |
| else |
| { |
| ps_cand_mode_list[0] = cand_intra_pred_mode_left; |
| ps_cand_mode_list[1] = cand_intra_pred_mode_top; |
| |
| if((cand_intra_pred_mode_left != INTRA_PLANAR) && |
| (cand_intra_pred_mode_top != INTRA_PLANAR)) |
| { |
| ps_cand_mode_list[2] = INTRA_PLANAR; |
| } |
| else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) |
| { |
| ps_cand_mode_list[2] = INTRA_DC; |
| } |
| else |
| { |
| ps_cand_mode_list[2] = INTRA_ANGULAR(26); |
| } |
| } |
| } |
| /** |
| ******************************************************************************* |
| * \if Function name : ihevce_intra_pred_mode_signaling \endif |
| * |
| * \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & |
| * b5_rem_intra_pred_mode for a CU |
| * |
| * \par Description |
| * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode |
| * for a CU |
| * |
| * \param[in] ps_nbr_top Top neighbour context |
| * \param[in] ps_nbr_left Left neighbour context |
| * \param[in] available_top Top availability flag |
| * \param[in] available_left Left availability flag |
| * \param[in] cu_pos_y CU 'y' position |
| * \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block |
| * \param[inout] ps_intra_pred_mode_current |
| * Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and |
| * b5_rem_intra_pred_mode |
| * |
| * \returns none |
| * |
| * \author |
| * Ittiam |
| * |
| ******************************************************************************* |
| */ |
| |
| void ihevce_intra_pred_mode_signaling( |
| WORD32 top_intra_mode, |
| WORD32 left_intra_mode, |
| WORD32 available_top, |
| WORD32 available_left, |
| WORD32 cu_pos_y, |
| WORD32 luma_intra_pred_mode_current, |
| intra_prev_rem_flags_t *ps_intra_pred_mode_current) |
| { |
| /* local variables */ |
| WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; |
| WORD32 cand_mode_list[3]; |
| |
| ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0; |
| ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose |
| ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0; |
| |
| /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
| /* N = top */ |
| if(0 == available_top) |
| { |
| cand_intra_pred_mode_top = INTRA_DC; |
| } |
| /* for neighbour != INTRA, setting DC is done outside */ |
| else if(0 == cu_pos_y) /* It's on the CTB boundary */ |
| { |
| cand_intra_pred_mode_top = INTRA_DC; |
| } |
| else |
| { |
| cand_intra_pred_mode_top = top_intra_mode; |
| } |
| |
| /* N = left */ |
| if(0 == available_left) |
| { |
| cand_intra_pred_mode_left = INTRA_DC; |
| } |
| /* for neighbour != INTRA, setting DC is done outside */ |
| else |
| { |
| cand_intra_pred_mode_left = left_intra_mode; |
| } |
| |
| /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
| if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) |
| { |
| if(cand_intra_pred_mode_left < 2) |
| { |
| cand_mode_list[0] = INTRA_PLANAR; |
| cand_mode_list[1] = INTRA_DC; |
| cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ |
| } |
| else |
| { |
| cand_mode_list[0] = cand_intra_pred_mode_left; |
| cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); |
| cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); |
| } |
| } |
| else |
| { |
| cand_mode_list[0] = cand_intra_pred_mode_left; |
| cand_mode_list[1] = cand_intra_pred_mode_top; |
| |
| if((cand_intra_pred_mode_left != INTRA_PLANAR) && |
| (cand_intra_pred_mode_top != INTRA_PLANAR)) |
| { |
| cand_mode_list[2] = INTRA_PLANAR; |
| } |
| else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) |
| { |
| cand_mode_list[2] = INTRA_DC; |
| } |
| else |
| { |
| cand_mode_list[2] = INTRA_ANGULAR(26); |
| } |
| } |
| |
| /* Signal Generation */ |
| |
| /* Flag & mpm_index generation */ |
| if(cand_mode_list[0] == luma_intra_pred_mode_current) |
| { |
| ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; |
| ps_intra_pred_mode_current->b2_mpm_idx = 0; |
| } |
| else if(cand_mode_list[1] == luma_intra_pred_mode_current) |
| { |
| ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; |
| ps_intra_pred_mode_current->b2_mpm_idx = 1; |
| } |
| else if(cand_mode_list[2] == luma_intra_pred_mode_current) |
| { |
| ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; |
| ps_intra_pred_mode_current->b2_mpm_idx = 2; |
| } |
| /* Flag & b5_rem_intra_pred_mode generation */ |
| else |
| { |
| WORD32 rem_mode; |
| |
| ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0; |
| |
| /* sorting cand_mode_list */ |
| if(cand_mode_list[0] > cand_mode_list[1]) |
| { |
| SWAP(cand_mode_list[0], cand_mode_list[1]); |
| } |
| if(cand_mode_list[0] > cand_mode_list[2]) |
| { |
| SWAP(cand_mode_list[0], cand_mode_list[2]); |
| } |
| if(cand_mode_list[1] > cand_mode_list[2]) |
| { |
| SWAP(cand_mode_list[1], cand_mode_list[2]); |
| } |
| |
| rem_mode = luma_intra_pred_mode_current; |
| |
| if((rem_mode) >= cand_mode_list[2]) |
| { |
| (rem_mode)--; |
| } |
| if((rem_mode) >= cand_mode_list[1]) |
| { |
| (rem_mode)--; |
| } |
| if((rem_mode) >= cand_mode_list[0]) |
| { |
| (rem_mode)--; |
| } |
| ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode; |
| } |
| } |
| |
| void ihevce_quant_rounding_factor_gen( |
| WORD32 i4_trans_size, |
| WORD32 is_luma, |
| rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, |
| WORD32 *pi4_quant_round_0_1, |
| WORD32 *pi4_quant_round_1_2, |
| double i4_lamda_modifier, |
| UWORD8 i4_is_tu_level_quant_rounding) |
| { |
| //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx; |
| UWORD8 *pu1_ctxt_model; |
| WORD32 scan_pos; |
| WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */ |
| WORD32 abs_gt1_base_ctxt; |
| WORD32 log2_tr_size, i; |
| UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2; |
| UWORD16 u4_bits_estimated_r1_temp; |
| WORD32 j = 0; |
| WORD32 k = 0; |
| WORD32 temp2; |
| |
| double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0)); |
| LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT)); |
| /* transform size to log2transform size */ |
| GETRANGE(log2_tr_size, i4_trans_size); |
| log2_tr_size -= 1; |
| |
| if(1 == i4_is_tu_level_quant_rounding) |
| { |
| entropy_context_t *ps_cur_tu_entropy; |
| cab_ctxt_t *ps_cabac; |
| WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx; |
| ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx]; |
| |
| ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt; |
| |
| pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0]; |
| } |
| else |
| { |
| pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0]; |
| } |
| /*If transform size is 4x4, then only one sub-block*/ |
| if(is_luma) |
| { |
| sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG; |
| abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG; |
| |
| if(3 == log2_tr_size) |
| { |
| /* 8x8 transform size */ |
| /* Assuming diagnol scan idx for now */ |
| sig_coeff_base_ctxt += 9; |
| } |
| else if(3 < log2_tr_size) |
| { |
| /* larger transform sizes */ |
| sig_coeff_base_ctxt += 21; |
| } |
| } |
| else |
| { |
| /* chroma context initializations */ |
| sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27; |
| abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16; |
| |
| if(3 == log2_tr_size) |
| { |
| /* 8x8 transform size */ |
| sig_coeff_base_ctxt += 9; |
| } |
| else if(3 < log2_tr_size) |
| { |
| /* larger transform sizes */ |
| sig_coeff_base_ctxt += 12; |
| } |
| } |
| |
| /*Transform size of 4x4 will have only a single CSB */ |
| /* derive the context inc as per section 9.3.3.1.4 */ |
| |
| if(2 == log2_tr_size) |
| { |
| UWORD8 sig_ctxinc; |
| WORD32 state_mps; |
| WORD32 gt1_ctxt = 0; |
| WORD32 ctxt_set = 0; |
| WORD32 ctxt_idx = 0; |
| |
| /* context set based on luma subblock pos */ |
| |
| /* Encodet the abs level gt1 bins */ |
| /* Currently calculating trade off between mps(2) and mps(1)*/ |
| /* The estimation has to be further done for mps(11) and mps(111)*/ |
| /*ctxt_set = 0 as transform 4x4 has only one csb with DC */ |
| /* gt1_ctxt = 0 for the co-ef value to be 2 */ |
| |
| ctxt_set = gt1_ctxt = 0; |
| ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
| |
| state_mps = pu1_ctxt_model[ctxt_idx]; |
| |
| u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; |
| |
| u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod); |
| for(scan_pos = 0; scan_pos < 16; scan_pos++) |
| { |
| *(pi4_quant_round_1_2 + scan_pos) = temp2; |
| } |
| |
| for(scan_pos = 0; scan_pos < 16; scan_pos++) |
| { |
| //UWORD8 nbr_csbf = 1; |
| /* derive the x,y pos */ |
| UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
| |
| /* 4x4 transform size increment uses lookup */ |
| sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos]; |
| |
| /*Get the mps state based on ctxt modes */ |
| state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt]; |
| |
| /* Bits taken to encode sig co-ef flag as 0 */ |
| u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| // |
| u4_bits_estimated_r1 = |
| (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; |
| |
| QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
| *(pi4_quant_round_0_1 + scan_pos) = temp2; |
| } |
| } |
| else |
| { |
| UWORD8 *pu1_hevce_sigcoeff_ctxtinc; |
| WORD32 is_nbr_csb_state_mps; |
| |
| WORD32 state_mps; |
| WORD32 gt1_ctxt = 0; |
| WORD32 ctxt_set = 0; |
| WORD32 ctxt_idx; |
| /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/ |
| /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/ |
| |
| /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
| ctxt_set = 1 DC subblock, the previous state did have >= 2 |
| ctxt_set = 2 AC subblock, the previous state did not have 2 |
| ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
| i = 1; |
| ctxt_set = (i && is_luma) ? 2 : 0; |
| |
| ctxt_set++; |
| |
| /*0th position indicates the probability of 2 */ |
| /*1th position indicates the probability of 1 */ |
| /*2th position indicates the probability of 11 */ |
| /*3th position indicates the probability of 111 */ |
| |
| gt1_ctxt = 0; |
| ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
| |
| state_mps = pu1_ctxt_model[ctxt_idx]; |
| |
| u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; |
| |
| u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod); |
| |
| for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++) |
| { |
| *(pi4_quant_round_1_2 + scan_pos) = temp2; |
| } |
| |
| i = 0; |
| ctxt_set = (i && is_luma) ? 2 : 0; |
| ctxt_set++; |
| |
| /*0th position indicates the probability of 2 */ |
| /*1th position indicates the probability of 1 */ |
| /*2th position indicates the probability of 11 */ |
| /*3th position indicates the probability of 111 */ |
| |
| gt1_ctxt = 0; |
| ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
| |
| state_mps = pu1_ctxt_model[ctxt_idx]; |
| |
| u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; |
| |
| u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod); |
| |
| for(scan_pos = 0; scan_pos < 16; scan_pos++) |
| { |
| *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2; |
| } |
| |
| { |
| WORD32 ctxt_idx; |
| |
| WORD32 nbr_csbf_0, nbr_csbf_1; |
| WORD32 state_mps_0, state_mps_1; |
| ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX; |
| ctxt_idx += is_luma ? 0 : 2; |
| |
| /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */ |
| /* if neibhor not available, ctxt idx = 0*/ |
| nbr_csbf_0 = 0; |
| ctxt_idx += nbr_csbf_0 ? 1 : 0; |
| state_mps_0 = pu1_ctxt_model[ctxt_idx]; |
| |
| nbr_csbf_1 = 1; |
| ctxt_idx += nbr_csbf_1 ? 1 : 0; |
| state_mps_1 = pu1_ctxt_model[ctxt_idx]; |
| |
| is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1); |
| } |
| |
| if(1 == is_nbr_csb_state_mps) |
| { |
| for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++) |
| { |
| UWORD8 sig_ctxinc; |
| WORD32 state_mps; |
| WORD32 gt1_ctxt = 0; |
| WORD32 ctxt_set = 0; |
| |
| WORD32 ctxt_idx; |
| |
| /*Check if the cabac states had previous nbr available */ |
| |
| if(i == 0) |
| pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0]; |
| else if(i < (i4_trans_size >> 2)) |
| pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0]; |
| else if((i % (i4_trans_size >> 2)) == 0) |
| pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0]; |
| else |
| pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0]; |
| |
| if(((i % (i4_trans_size >> 2)) == 0) && (i != 0)) |
| k++; |
| |
| j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4); |
| /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
| ctxt_set = 1 DC subblock, the previous state did have >= 2 |
| ctxt_set = 2 AC subblock, the previous state did not have 2 |
| ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
| |
| ctxt_set = (i && is_luma) ? 2 : 0; |
| |
| /* gt1_ctxt = 1 for the co-ef value to be 1 */ |
| gt1_ctxt = 0; |
| ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
| |
| state_mps = pu1_ctxt_model[ctxt_idx]; |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| for(scan_pos = 0; scan_pos < 16; scan_pos++) |
| { |
| UWORD8 y_pos_x_pos; |
| |
| if(scan_pos || i) |
| { |
| y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
| /* ctxt for AC coeff depends on curpos and neigbour csbf */ |
| sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; |
| |
| /* based on luma subblock pos */ |
| sig_ctxinc += (i && is_luma) ? 3 : 0; |
| |
| sig_ctxinc += sig_coeff_base_ctxt; |
| } |
| else |
| { |
| /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */ |
| /* DC coeff has fixed context for luma and chroma */ |
| sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27; |
| } |
| |
| /*Get the mps state based on ctxt modes */ |
| state_mps = pu1_ctxt_model[sig_ctxinc]; |
| |
| /* Bits taken to encode sig co-ef flag as 0 */ |
| u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| u4_bits_estimated_r1 = |
| (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; |
| { |
| QUANT_ROUND_FACTOR( |
| temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
| *(pi4_quant_round_0_1 + |
| ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2; |
| } |
| } |
| } |
| } |
| else |
| { |
| /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock, |
| Hence will write the same value to all sub block, and overwrite for the 1st one */ |
| i = 1; |
| { |
| UWORD8 sig_ctxinc; |
| UWORD8 y_pos_x_pos; |
| WORD32 quant_rounding_0_1; |
| |
| pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0]; |
| |
| scan_pos = 0; |
| y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
| /* ctxt for AC coeff depends on curpos and neigbour csbf */ |
| sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; |
| |
| /* based on luma subblock pos */ |
| sig_ctxinc += (is_luma) ? 3 : 0; |
| |
| sig_ctxinc += sig_coeff_base_ctxt; |
| |
| /*Get the mps state based on ctxt modes */ |
| state_mps = pu1_ctxt_model[sig_ctxinc]; |
| |
| /* Bits taken to encode sig co-ef flag as 0 */ |
| u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| u4_bits_estimated_r1 = |
| (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
| |
| /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
| ctxt_set = 1 DC subblock, the previous state did have >= 2 |
| ctxt_set = 2 AC subblock, the previous state did not have 2 |
| ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
| |
| ctxt_set = (i && is_luma) ? 2 : 0; |
| |
| /* gt1_ctxt = 1 for the co-ef value to be 1 */ |
| gt1_ctxt = 0; |
| ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
| |
| state_mps = pu1_ctxt_model[ctxt_idx]; |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| QUANT_ROUND_FACTOR( |
| quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
| |
| for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); |
| scan_pos++) |
| { |
| *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1; |
| } |
| } |
| |
| /*First Subblock*/ |
| i = 0; |
| |
| { |
| UWORD8 sig_ctxinc; |
| WORD32 state_mps; |
| WORD32 gt1_ctxt = 0; |
| WORD32 ctxt_set = 0; |
| |
| WORD32 ctxt_idx; |
| |
| /*Check if the cabac states had previous nbr available */ |
| |
| { |
| pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0]; |
| |
| /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
| ctxt_set = 1 DC subblock, the previous state did have >= 2 |
| ctxt_set = 2 AC subblock, the previous state did not have 2 |
| ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
| ctxt_set = (i && is_luma) ? 2 : 0; |
| |
| /* gt1_ctxt = 1 for the co-ef value to be 1 */ |
| gt1_ctxt = 0; |
| ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
| |
| state_mps = pu1_ctxt_model[ctxt_idx]; |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| for(scan_pos = 0; scan_pos < 16; scan_pos++) |
| { |
| UWORD8 y_pos_x_pos; |
| |
| if(scan_pos) |
| { |
| y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
| /* ctxt for AC coeff depends on curpos and neigbour csbf */ |
| sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; |
| |
| /* based on luma subblock pos */ |
| sig_ctxinc += (i && is_luma) ? 3 : 0; |
| |
| sig_ctxinc += sig_coeff_base_ctxt; |
| } |
| else |
| { |
| /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */ |
| /* DC coeff has fixed context for luma and chroma */ |
| sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27; |
| } |
| |
| /*Get the mps state based on ctxt modes */ |
| state_mps = pu1_ctxt_model[sig_ctxinc]; |
| |
| /* Bits taken to encode sig co-ef flag as 0 */ |
| u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
| |
| u4_bits_estimated_r1 = |
| (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
| |
| /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
| u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; |
| { |
| QUANT_ROUND_FACTOR( |
| temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
| *(pi4_quant_round_0_1 + |
| ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2; |
| } |
| } |
| } |
| } |
| } |
| } |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif |
| * |
| * \brief |
| * Transform unit level (Luma) enc_loop function |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] pu1_pred pointer to predicted data buffer |
| * \param[in] pred_strd predicted buffer stride |
| * \param[in] pu1_src pointer to source data buffer |
| * \param[in] src_strd source buffer stride |
| * \param[in] pi2_deq_data pointer to store iq data |
| * \param[in] deq_data_strd iq data buffer stride |
| * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
| * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current |
| * block |
| * \param[out] csbf_strd csbf buffer stride |
| * \param[in] trans_size transform size (4, 8, 16,32) |
| * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip |
| * \param[out] pi4_cost pointer to store the cost |
| * \param[out] pi4_coeff_off pointer to store the number of bytes produced in |
| * coeff buffer |
| * \param[out] pu4_tu_bits pointer to store the best TU bits required encode |
| the current TU in RDopt Mode |
| * \param[out] pu4_blk_sad pointer to store the block sad for RC |
| * \param[out] pi4_zero_col pointer to store the zero_col info for the TU |
| * \param[out] pi4_zero_row pointer to store the zero_row info for the TU |
| * \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not |
| * \param[in] i4_perform_sbh Indicates if SBH should be performed or not |
| * |
| * \return |
| * CBF of the current block |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| |
| WORD32 ihevce_t_q_iq_ssd_scan_fxn( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| UWORD8 *pu1_pred, |
| WORD32 pred_strd, |
| UWORD8 *pu1_src, |
| WORD32 src_strd, |
| WORD16 *pi2_deq_data, |
| WORD32 deq_data_strd, |
| UWORD8 *pu1_recon, |
| WORD32 i4_recon_stride, |
| UWORD8 *pu1_ecd_data, |
| UWORD8 *pu1_csbf_buf, |
| WORD32 csbf_strd, |
| WORD32 trans_size, |
| WORD32 packed_pred_mode, |
| LWORD64 *pi8_cost, |
| WORD32 *pi4_coeff_off, |
| WORD32 *pi4_tu_bits, |
| UWORD32 *pu4_blk_sad, |
| WORD32 *pi4_zero_col, |
| WORD32 *pi4_zero_row, |
| UWORD8 *pu1_is_recon_available, |
| WORD32 i4_perform_rdoq, |
| WORD32 i4_perform_sbh, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| WORD32 i4_alpha_stim_multiplier, |
| UWORD8 u1_is_cu_noisy, |
| #endif |
| SSD_TYPE_T e_ssd_type, |
| WORD32 early_cbf) |
| { |
| WORD32 cbf = 0; |
| WORD32 trans_idx; |
| WORD32 quant_scale_mat_offset; |
| WORD32 *pi4_trans_scratch; |
| WORD16 *pi2_trans_values; |
| WORD16 *pi2_quant_coeffs; |
| WORD32 *pi4_subBlock2csbfId_map = NULL; |
| |
| #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
| WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i; |
| #endif |
| |
| rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt; |
| |
| WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) || |
| (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE); |
| WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING); |
| WORD8 intra_flag = 0; |
| ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW); |
| |
| *pi4_tu_bits = 0; |
| *pi4_coeff_off = 0; |
| pu1_is_recon_available[0] = 0; |
| |
| if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf)) |
| { |
| if(e_ssd_type != NULL_TYPE) |
| { |
| /* SSD cost is stored to the pointer */ |
| pi8_cost[0] = |
| |
| ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator( |
| pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad); |
| |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi8_cost[0], |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| #endif |
| |
| /* copy pred to recon for skip mode */ |
| if(SPATIAL_DOMAIN_SSD == e_ssd_type) |
| { |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size); |
| pu1_is_recon_available[0] = 1; |
| } |
| else |
| { |
| pu1_is_recon_available[0] = 0; |
| } |
| |
| #if ENABLE_INTER_ZCU_COST |
| ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; |
| #endif |
| } |
| else |
| { |
| pi8_cost[0] = UINT_MAX; |
| } |
| |
| /* cbf is returned as 0 */ |
| return (0); |
| } |
| |
| /* derive context variables */ |
| pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0]; |
| pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; |
| pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2); |
| |
| /* translate the transform size to index for 4x4 and 8x8 */ |
| trans_idx = trans_size >> 2; |
| |
| if(PRED_MODE_INTRA == packed_pred_mode) |
| { |
| quant_scale_mat_offset = 0; |
| intra_flag = 1; |
| #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
| ai4_quant_rounding_factors[0][0] = |
| MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3); |
| |
| for(i = 0; i < trans_size * trans_size; i++) |
| { |
| ai4_quant_rounding_factors[1][i] = |
| MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i], |
| (1 << QUANT_ROUND_FACTOR_Q) / 3); |
| ai4_quant_rounding_factors[2][i] = |
| MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i], |
| (1 << QUANT_ROUND_FACTOR_Q) / 3); |
| } |
| #endif |
| } |
| else |
| { |
| quant_scale_mat_offset = NUM_TRANS_TYPES; |
| } |
| /* for intra 4x4 DST transform should be used */ |
| if((1 == trans_idx) && (1 == intra_flag)) |
| { |
| trans_idx = 0; |
| } |
| /* for 16x16 cases */ |
| else if(16 == trans_size) |
| { |
| trans_idx = 3; |
| } |
| /* for 32x32 cases */ |
| else if(32 == trans_size) |
| { |
| trans_idx = 4; |
| } |
| |
| switch(trans_size) |
| { |
| case 4: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU; |
| |
| break; |
| } |
| case 8: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU; |
| |
| break; |
| } |
| case 16: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU; |
| |
| break; |
| } |
| case 32: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU; |
| |
| break; |
| } |
| } |
| |
| /* Do not call the FT and Quant functions if early_cbf is 0 */ |
| if(1 == early_cbf) |
| { |
| /* ---------- call residue and transform block ------- */ |
| *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx]( |
| pu1_src, |
| pu1_pred, |
| pi4_trans_scratch, |
| pi2_trans_values, |
| src_strd, |
| pred_strd, |
| trans_size, |
| NULL_PLANE); |
| |
| cbf = ps_ctxt->apf_quant_iquant_ssd |
| [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]( |
| pi2_trans_values, |
| ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset], |
| pi2_quant_coeffs, |
| pi2_deq_data, |
| trans_size, |
| ps_ctxt->i4_cu_qp_div6, |
| ps_ctxt->i4_cu_qp_mod6, |
| #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
| ps_ctxt->i4_quant_rnd_factor[intra_flag], |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
| #else |
| intra_flag ? ai4_quant_rounding_factors[0][0] |
| : ps_ctxt->i4_quant_rnd_factor[intra_flag], |
| intra_flag ? ai4_quant_rounding_factors[1] |
| : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
| intra_flag ? ai4_quant_rounding_factors[2] |
| : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
| #endif |
| trans_size, |
| trans_size, |
| deq_data_strd, |
| pu1_csbf_buf, |
| csbf_strd, |
| pi4_zero_col, |
| pi4_zero_row, |
| ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset], |
| pi8_cost); |
| |
| if(e_ssd_type != FREQUENCY_DOMAIN_SSD) |
| { |
| pi8_cost[0] = UINT_MAX; |
| } |
| } |
| |
| if(0 != cbf) |
| { |
| if(i4_perform_sbh || i4_perform_rdoq) |
| { |
| ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd; |
| ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size; |
| ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map; |
| |
| ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6; |
| ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6; |
| ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx; |
| ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; |
| ps_rdoq_sbh_ctxt->i4_trans_size = trans_size; |
| |
| ps_rdoq_sbh_ctxt->pi2_dequant_coeff = |
| ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset]; |
| ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data; |
| ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs; |
| ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values; |
| ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf; |
| |
| /* ------- call coeffs scan function ------- */ |
| if((!i4_perform_rdoq)) |
| { |
| ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
| |
| pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
| } |
| } |
| |
| *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
| pi2_quant_coeffs, |
| pi4_subBlock2csbfId_map, |
| ps_ctxt->i4_scan_idx, |
| trans_size, |
| pu1_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd); |
| } |
| *pi8_cost >>= ga_trans_shift[trans_idx]; |
| |
| #if RDOPT_ZERO_CBF_ENABLE |
| /* compare null cbf cost with encode tu rd-cost */ |
| if(cbf != 0) |
| { |
| WORD32 tu_bits; |
| LWORD64 tu_rd_cost; |
| |
| LWORD64 zero_cbf_cost = 0; |
| |
| /*Populating the feilds of rdoq_ctxt structure*/ |
| if(i4_perform_rdoq) |
| { |
| /* transform size to log2transform size */ |
| GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size); |
| ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1; |
| ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf; |
| ps_rdoq_sbh_ctxt->i4_is_luma = 1; |
| ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx]; |
| ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td = |
| (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2; |
| ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0; |
| ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col; |
| ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row; |
| } |
| else if(i4_perform_zcbf) |
| { |
| zero_cbf_cost = |
| |
| ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( |
| pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE); |
| } |
| |
| /************************************************************************/ |
| /* call the entropy rdo encode to get the bit estimate for current tu */ |
| /* note that tu includes only residual coding bits and does not include */ |
| /* tu split, cbf and qp delta encoding bits for a TU */ |
| /************************************************************************/ |
| if(i4_perform_rdoq) |
| { |
| tu_bits = ihevce_entropy_rdo_encode_tu_rdoq( |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| (pu1_ecd_data), |
| trans_size, |
| 1, |
| ps_rdoq_sbh_ctxt, |
| pi8_cost, |
| &zero_cbf_cost, |
| 0); |
| |
| if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0) |
| { |
| cbf = 0; |
| *pi4_coeff_off = 0; |
| } |
| |
| if((i4_perform_sbh) && (0 != cbf)) |
| { |
| ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; |
| ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
| *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
| } |
| |
| /*Add round value before normalizing*/ |
| *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td; |
| *pi8_cost >>= ga_trans_shift[trans_idx]; |
| |
| if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1) |
| { |
| pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; |
| *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
| pi2_quant_coeffs, |
| pi4_subBlock2csbfId_map, |
| ps_ctxt->i4_scan_idx, |
| trans_size, |
| pu1_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd); |
| } |
| } |
| else |
| { |
| tu_bits = ihevce_entropy_rdo_encode_tu( |
| &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh); |
| } |
| |
| *pi4_tu_bits = tu_bits; |
| |
| if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
| { |
| *pi8_cost = ihevce_it_recon_ssd( |
| ps_ctxt, |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi2_deq_data, |
| deq_data_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| trans_size, |
| packed_pred_mode, |
| cbf, |
| *pi4_zero_col, |
| *pi4_zero_row, |
| NULL_PLANE); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pi8_cost[0], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi8_cost[0], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| #endif |
| |
| /* add the SSD cost to bits estimate given by ECD */ |
| tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30( |
| tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| |
| if(i4_perform_zcbf) |
| { |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| zero_cbf_cost = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| zero_cbf_cost, |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| #endif |
| |
| /* force the tu as zero cbf if zero_cbf_cost is lower */ |
| if(zero_cbf_cost < tu_rd_cost) |
| { |
| /* num bytes is set to 0 */ |
| *pi4_coeff_off = 0; |
| |
| /* cbf is returned as 0 */ |
| cbf = 0; |
| |
| /* cost is returned as 0 cbf cost */ |
| *pi8_cost = zero_cbf_cost; |
| |
| /* TU bits is set to 0 */ |
| *pi4_tu_bits = 0; |
| pu1_is_recon_available[0] = 0; |
| |
| if(SPATIAL_DOMAIN_SSD == e_ssd_type) |
| { |
| /* copy pred to recon for zcbf mode */ |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| } |
| /* accumulate cu not coded cost with zcbf cost */ |
| #if ENABLE_INTER_ZCU_COST |
| ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost; |
| #endif |
| } |
| } |
| else |
| { |
| /* cbf = 0, accumulate cu not coded cost */ |
| if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
| { |
| *pi8_cost = ihevce_it_recon_ssd( |
| ps_ctxt, |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi2_deq_data, |
| deq_data_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| trans_size, |
| packed_pred_mode, |
| cbf, |
| *pi4_zero_col, |
| *pi4_zero_row, |
| NULL_PLANE); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| |
| #if ENABLE_INTER_ZCU_COST |
| { |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pi8_cost[0], |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi8_cost[0], |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| #endif |
| |
| ps_ctxt->i8_cu_not_coded_cost += *pi8_cost; |
| } |
| #endif /* ENABLE_INTER_ZCU_COST */ |
| } |
| #endif |
| |
| return (cbf); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_it_recon_fxn \endif |
| * |
| * \brief |
| * Transform unit level (Luma) IT Recon function |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] pi2_deq_data pointer to iq data |
| * \param[in] deq_data_strd iq data buffer stride |
| * \param[in] pu1_pred pointer to predicted data buffer |
| * \param[in] pred_strd predicted buffer stride |
| * \param[in] pu1_recon pointer to recon buffer |
| * \param[in] recon_strd recon buffer stride |
| * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
| * \param[in] trans_size transform size (4, 8, 16,32) |
| * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip |
| * \param[in] cbf CBF of the current block |
| * \param[in] zero_cols zero_cols of the current block |
| * \param[in] zero_rows zero_rows of the current block |
| * |
| * \return |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| |
| void ihevce_it_recon_fxn( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| WORD16 *pi2_deq_data, |
| WORD32 deq_dat_strd, |
| UWORD8 *pu1_pred, |
| WORD32 pred_strd, |
| UWORD8 *pu1_recon, |
| WORD32 recon_strd, |
| UWORD8 *pu1_ecd_data, |
| WORD32 trans_size, |
| WORD32 packed_pred_mode, |
| WORD32 cbf, |
| WORD32 zero_cols, |
| WORD32 zero_rows) |
| { |
| WORD32 dc_add_flag = 0; |
| WORD32 trans_idx; |
| |
| /* translate the transform size to index for 4x4 and 8x8 */ |
| trans_idx = trans_size >> 2; |
| |
| /* if SKIP mode needs to be evaluated the pred is copied to recon */ |
| if(PRED_MODE_SKIP == packed_pred_mode) |
| { |
| UWORD8 *pu1_curr_recon, *pu1_curr_pred; |
| |
| pu1_curr_pred = pu1_pred; |
| pu1_curr_recon = pu1_recon; |
| |
| /* 2D copy of data */ |
| |
| ps_ctxt->s_cmn_opt_func.pf_2d_square_copy( |
| pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8)); |
| |
| return; |
| } |
| |
| /* for intra 4x4 DST transform should be used */ |
| if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode)) |
| { |
| trans_idx = 0; |
| } |
| /* for 16x16 cases */ |
| else if(16 == trans_size) |
| { |
| trans_idx = 3; |
| } |
| /* for 32x32 cases */ |
| else if(32 == trans_size) |
| { |
| trans_idx = 4; |
| } |
| |
| /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */ |
| if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1])) |
| { |
| dc_add_flag = 1; |
| } |
| |
| if(0 == cbf) |
| { |
| /* buffer copy */ |
| ps_ctxt->s_cmn_opt_func.pf_2d_square_copy( |
| pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1); |
| } |
| else if((1 == dc_add_flag) && (0 != trans_idx)) |
| { |
| /* dc add */ |
| ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc( |
| pu1_pred, |
| pred_strd, |
| pu1_recon, |
| recon_strd, |
| trans_size, |
| pi2_deq_data[0], |
| NULL_PLANE /* luma */ |
| ); |
| } |
| else |
| { |
| ps_ctxt->apf_it_recon[trans_idx]( |
| pi2_deq_data, |
| &ps_ctxt->ai2_scratch[0], |
| pu1_pred, |
| pu1_recon, |
| deq_dat_strd, |
| pred_strd, |
| recon_strd, |
| zero_cols, |
| zero_rows); |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_chroma_it_recon_fxn \endif |
| * |
| * \brief |
| * Transform unit level (Chroma) IT Recon function |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] pi2_deq_data pointer to iq data |
| * \param[in] deq_data_strd iq data buffer stride |
| * \param[in] pu1_pred pointer to predicted data buffer |
| * \param[in] pred_strd predicted buffer stride |
| * \param[in] pu1_recon pointer to recon buffer |
| * \param[in] recon_strd recon buffer stride |
| * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
| * \param[in] trans_size transform size (4, 8, 16) |
| * \param[in] cbf CBF of the current block |
| * \param[in] zero_cols zero_cols of the current block |
| * \param[in] zero_rows zero_rows of the current block |
| * |
| * \return |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| |
| void ihevce_chroma_it_recon_fxn( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| WORD16 *pi2_deq_data, |
| WORD32 deq_dat_strd, |
| UWORD8 *pu1_pred, |
| WORD32 pred_strd, |
| UWORD8 *pu1_recon, |
| WORD32 recon_strd, |
| UWORD8 *pu1_ecd_data, |
| WORD32 trans_size, |
| WORD32 cbf, |
| WORD32 zero_cols, |
| WORD32 zero_rows, |
| CHROMA_PLANE_ID_T e_chroma_plane) |
| { |
| WORD32 trans_idx; |
| |
| ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE)); |
| |
| /* since 2x2 transform is not allowed for chroma*/ |
| if(2 == trans_size) |
| { |
| trans_size = 4; |
| } |
| |
| /* translate the transform size to index */ |
| trans_idx = trans_size >> 2; |
| |
| /* for 16x16 cases */ |
| if(16 == trans_size) |
| { |
| trans_idx = 3; |
| } |
| |
| if(0 == cbf) |
| { |
| /* buffer copy */ |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane); |
| } |
| else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1])) |
| { |
| /* dc add */ |
| ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc( |
| pu1_pred, |
| pred_strd, |
| pu1_recon, |
| recon_strd, |
| trans_size, |
| pi2_deq_data[0], |
| e_chroma_plane /* chroma plane */ |
| ); |
| } |
| else |
| { |
| ps_ctxt->apf_chrm_it_recon[trans_idx - 1]( |
| pi2_deq_data, |
| &ps_ctxt->ai2_scratch[0], |
| pu1_pred + (WORD32)e_chroma_plane, |
| pu1_recon + (WORD32)e_chroma_plane, |
| deq_dat_strd, |
| pred_strd, |
| recon_strd, |
| zero_cols, |
| zero_rows); |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif |
| * |
| * \brief * Filters the RDOPT candidates based on mpm_idx |
| * |
| * \par Description |
| * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode |
| * for a CU |
| * |
| * \param[in] ps_ctxt : ptr to enc loop context |
| * \param[in] ps_cu_analyse : ptr to CU analyse structure |
| * \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer |
| * \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer |
| * \param[in] pu1_luma_mode luma mode |
| * |
| * \returns none |
| * |
| * \author |
| * Ittiam |
| * |
| ******************************************************************************* |
| */ |
| |
| void ihevce_mpm_idx_based_filter_RDOPT_cand( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| cu_analyse_t *ps_cu_analyse, |
| nbr_4x4_t *ps_left_nbr_4x4, |
| nbr_4x4_t *ps_top_nbr_4x4, |
| UWORD8 *pu1_luma_mode, |
| UWORD8 *pu1_eval_mark) |
| { |
| WORD32 cu_pos_x; |
| WORD32 cu_pos_y; |
| nbr_avail_flags_t s_nbr; |
| WORD32 trans_size; |
| WORD32 au4_cand_mode_list[3]; |
| WORD32 nbr_flags; |
| UWORD8 *pu1_intra_luma_modes; |
| WORD32 rdopt_cand_ctr = 0; |
| UWORD8 *pu1_luma_eval_mark; |
| |
| cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1; |
| cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1; |
| trans_size = ps_cu_analyse->u1_cu_size; |
| |
| /* get the neighbour availability flags */ |
| nbr_flags = ihevce_get_nbr_intra( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| trans_size >> 2); |
| (void)nbr_flags; |
| /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro |
| *TU=CU/2 also since the modes are same in both the cases. |
| */ |
| ihevce_populate_intra_pred_mode( |
| ps_top_nbr_4x4->b6_luma_intra_mode, |
| ps_left_nbr_4x4->b6_luma_intra_mode, |
| s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| cu_pos_y, |
| &au4_cand_mode_list[0]); |
| |
| /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT |
| *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero |
| */ |
| |
| pu1_intra_luma_modes = pu1_luma_mode; |
| pu1_luma_eval_mark = pu1_eval_mark; |
| |
| while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255) |
| { |
| WORD32 i; |
| WORD32 found_flag = 0; |
| |
| /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage |
| *irrespective of whether the cand is present in the mpm idx list or not |
| */ |
| if(rdopt_cand_ctr == 0) |
| { |
| rdopt_cand_ctr++; |
| continue; |
| } |
| |
| for(i = 0; i < 3; i++) |
| { |
| if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i]) |
| { |
| found_flag = 1; |
| break; |
| } |
| } |
| |
| if(found_flag == 0) |
| { |
| pu1_luma_eval_mark[rdopt_cand_ctr] = 0; |
| } |
| |
| rdopt_cand_ctr++; |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_intra_rdopt_cu_ntu \endif |
| * |
| * \brief |
| * Intra Coding unit funtion for RD opt mode |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure |
| * \param[in] pu1_luma_mode : pointer to luma mode |
| * \param[in] ps_cu_analyse pointer to cu analyse pointer |
| * \param[in] pu1_src pointer to source data buffer |
| * \param[in] src_strd source buffer stride |
| * \param[in] pu1_cu_left pointer to left recon data buffer |
| * \param[in] pu1_cu_top pointer to top recon data buffer |
| * \param[in] pu1_cu_top_left pointer to top left recon data buffer |
| * \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer |
| * \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer |
| * \param[in] nbr_4x4_left_strd left nbr4x4 stride |
| * \param[in] cu_left_stride left recon buffer stride |
| * \param[in] curr_buf_idx RD opt buffer index for current usage |
| * \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T |
| * |
| * \return |
| * RDopt cost |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| LWORD64 ihevce_intra_rdopt_cu_ntu( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| enc_loop_cu_prms_t *ps_cu_prms, |
| void *pv_pred_org, |
| WORD32 pred_strd_org, |
| enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
| UWORD8 *pu1_luma_mode, |
| cu_analyse_t *ps_cu_analyse, |
| void *pv_curr_src, |
| void *pv_cu_left, |
| void *pv_cu_top, |
| void *pv_cu_top_left, |
| nbr_4x4_t *ps_left_nbr_4x4, |
| nbr_4x4_t *ps_top_nbr_4x4, |
| WORD32 nbr_4x4_left_strd, |
| WORD32 cu_left_stride, |
| WORD32 curr_buf_idx, |
| WORD32 func_proc_mode, |
| WORD32 i4_alpha_stim_multiplier) |
| { |
| enc_loop_cu_final_prms_t *ps_final_prms; |
| nbr_avail_flags_t s_nbr; |
| nbr_4x4_t *ps_nbr_4x4; |
| nbr_4x4_t *ps_tmp_lt_4x4; |
| recon_datastore_t *ps_recon_datastore; |
| |
| ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| UWORD32 *pu4_nbr_flags; |
| UWORD8 *pu1_intra_pred_mode; |
| WORD32 cu_pos_x; |
| WORD32 cu_pos_y; |
| WORD32 trans_size = 0; |
| UWORD8 *pu1_left; |
| UWORD8 *pu1_top; |
| UWORD8 *pu1_top_left; |
| UWORD8 *pu1_recon; |
| UWORD8 *pu1_csbf_buf; |
| UWORD8 *pu1_ecd_data; |
| WORD16 *pi2_deq_data; |
| WORD32 deq_data_strd; |
| LWORD64 total_rdopt_cost; |
| WORD32 ctr; |
| WORD32 left_strd; |
| WORD32 i4_recon_stride; |
| WORD32 csbf_strd; |
| WORD32 ecd_data_bytes_cons; |
| WORD32 num_4x4_in_tu; |
| WORD32 num_4x4_in_cu; |
| WORD32 chrm_present_flag; |
| WORD32 tx_size; |
| WORD32 cu_bits; |
| WORD32 num_cu_parts = 0; |
| WORD32 num_cands = 0; |
| WORD32 cu_pos_x_8pelunits; |
| WORD32 cu_pos_y_8pelunits; |
| WORD32 i4_perform_rdoq; |
| WORD32 i4_perform_sbh; |
| UWORD8 u1_compute_spatial_ssd; |
| UWORD8 u1_compute_recon; |
| UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END]; |
| |
| UWORD16 u2_num_tus_in_cu = 0; |
| WORD32 is_sub_pu_in_hq = 0; |
| /* Get the RDOPT cost of the best CU mode for early_exit */ |
| LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; |
| /* cabac context of prev intra luma pred flag */ |
| UWORD8 u1_prev_flag_cabac_ctxt = |
| ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG]; |
| WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; |
| |
| UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY; |
| |
| total_rdopt_cost = 0; |
| ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
| ps_recon_datastore = &ps_final_prms->s_recon_datastore; |
| i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
| csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
| pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
| pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; |
| pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; |
| deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */ |
| ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
| ps_tmp_lt_4x4 = ps_left_nbr_4x4; |
| pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0]; |
| pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0]; |
| cu_pos_x = ps_cu_analyse->b3_cu_pos_x; |
| cu_pos_y = ps_cu_analyse->b3_cu_pos_y; |
| cu_pos_x_8pelunits = cu_pos_x; |
| cu_pos_y_8pelunits = cu_pos_y; |
| |
| /* reset cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost = 0; |
| |
| /* based on the Processng mode */ |
| if(TU_EQ_CU == func_proc_mode) |
| { |
| ps_final_prms->u1_part_mode = SIZE_2Nx2N; |
| trans_size = ps_cu_analyse->u1_cu_size; |
| num_cu_parts = 1; |
| num_cands = 1; |
| u2_num_tus_in_cu = 1; |
| } |
| else if(TU_EQ_CU_DIV2 == func_proc_mode) |
| { |
| ps_final_prms->u1_part_mode = SIZE_2Nx2N; |
| trans_size = ps_cu_analyse->u1_cu_size >> 1; |
| num_cu_parts = 4; |
| num_cands = 1; |
| u2_num_tus_in_cu = 4; |
| } |
| else if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| ps_final_prms->u1_part_mode = SIZE_NxN; |
| trans_size = ps_cu_analyse->u1_cu_size >> 1; |
| num_cu_parts = 4; |
| /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */ |
| if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset) |
| { |
| if(ps_ctxt->i1_slice_type != BSLICE) |
| { |
| num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2; |
| } |
| else |
| { |
| num_cands = (2 * MAX_INTRA_CU_CANDIDATES); |
| } |
| } |
| else |
| { |
| num_cands = MAX_INTRA_CU_CANDIDATES; |
| } |
| u2_num_tus_in_cu = 4; |
| } |
| else |
| { |
| /* should not enter here */ |
| ASSERT(0); |
| } |
| |
| if(ps_ctxt->i1_cu_qp_delta_enable) |
| { |
| ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1); |
| } |
| |
| if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) |
| { |
| ps_ctxt->i8_cl_ssd_lambda_qf = |
| ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / |
| 100.0f); |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
| ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * |
| (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
| } |
| |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
| (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| |
| if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
| { |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| } |
| |
| /* populate the neigbours */ |
| pu1_left = (UWORD8 *)pv_cu_left; |
| pu1_top = (UWORD8 *)pv_cu_top; |
| pu1_top_left = (UWORD8 *)pv_cu_top_left; |
| left_strd = cu_left_stride; |
| num_4x4_in_tu = (trans_size >> 2); |
| num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2); |
| chrm_present_flag = 1; |
| ecd_data_bytes_cons = 0; |
| cu_bits = 0; |
| |
| /* get the 4x4 level postion of current cu */ |
| cu_pos_x = cu_pos_x << 1; |
| cu_pos_y = cu_pos_y << 1; |
| |
| /* pouplate cu level params knowing that current is intra */ |
| ps_final_prms->u1_skip_flag = 0; |
| ps_final_prms->u1_intra_flag = PRED_MODE_INTRA; |
| ps_final_prms->u2_num_pus_in_cu = 1; |
| /*init the is_cu_coded flag*/ |
| ps_final_prms->u1_is_cu_coded = 0; |
| ps_final_prms->u4_cu_sad = 0; |
| |
| ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA; |
| ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1; |
| ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1; |
| ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x; |
| ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y; |
| ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0; |
| |
| ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1; |
| |
| /*copy qp directly as intra cant be skip*/ |
| ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; |
| ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0; |
| ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0; |
| ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0; |
| ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0; |
| ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1; |
| ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1; |
| ps_nbr_4x4->mv.i1_l0_ref_idx = -1; |
| ps_nbr_4x4->mv.i1_l1_ref_idx = -1; |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| memcpy( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_COEFFX_PREFIX); |
| |
| /* RDOPT copy States :update to init state if 0 cbf */ |
| memcpy( |
| &au1_intra_nxn_rdopt_ctxt_models[0][0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_COEFFX_PREFIX); |
| memcpy( |
| &au1_intra_nxn_rdopt_ctxt_models[1][0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_COEFFX_PREFIX); |
| |
| /* loop for all partitions in CU blocks */ |
| for(ctr = 0; ctr < num_cu_parts; ctr++) |
| { |
| UWORD8 *pu1_curr_mode; |
| WORD32 cand_ctr; |
| WORD32 nbr_flags; |
| |
| /* for NxN case to track the best mode */ |
| /* for other cases zeroth index will be used */ |
| intra_prev_rem_flags_t as_intra_prev_rem[2]; |
| LWORD64 ai8_cand_rdopt_cost[2]; |
| UWORD32 au4_tu_sad[2]; |
| WORD32 ai4_tu_bits[2]; |
| WORD32 ai4_cbf[2]; |
| WORD32 ai4_curr_bytes[2]; |
| WORD32 ai4_zero_col[2]; |
| WORD32 ai4_zero_row[2]; |
| /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul. |
| cand. are there) ping-pong buffer to store the best and current */ |
| UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE]; |
| UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4]; |
| WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE]; |
| /* Context models stored for RDopt store and restore purpose */ |
| |
| UWORD8 au1_recon_availability[2]; |
| |
| WORD32 best_cand_idx = 0; |
| LWORD64 best_cand_cost = MAX_COST_64; |
| /* counters to toggle b/w best and current */ |
| WORD32 best_intra_buf_idx = 1; |
| WORD32 curr_intra_buf_idx = 0; |
| |
| /* copy the mode pointer to be used in inner loop */ |
| pu1_curr_mode = pu1_luma_mode; |
| |
| /* get the neighbour availability flags */ |
| nbr_flags = ihevce_get_nbr_intra( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| num_4x4_in_tu); |
| |
| /* copy the nbr flags for chroma reuse */ |
| if(4 != trans_size) |
| { |
| *pu4_nbr_flags = nbr_flags; |
| } |
| else if(1 == chrm_present_flag) |
| { |
| /* compute the avail flags assuming luma trans is 8x8 */ |
| /* get the neighbour availability flags */ |
| *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| (num_4x4_in_tu << 1), |
| (num_4x4_in_tu << 1)); |
| } |
| |
| u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3)); |
| |
| if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon)) |
| { |
| ps_recon_datastore->u1_is_lumaRecon_available = 1; |
| } |
| else if(!ctr) |
| { |
| ps_recon_datastore->u1_is_lumaRecon_available = 0; |
| } |
| |
| ihevc_intra_pred_luma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| /* call reference array substitution */ |
| ihevc_intra_pred_luma_ref_substitution_fptr( |
| pu1_top_left, |
| pu1_top, |
| pu1_left, |
| left_strd, |
| trans_size, |
| nbr_flags, |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1); |
| |
| /* Intra Mode gating based on MPM cand list and encoder quality preset */ |
| if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) && |
| (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) |
| { |
| ihevce_mpm_idx_based_filter_RDOPT_cand( |
| ps_ctxt, |
| ps_cu_analyse, |
| ps_left_nbr_4x4, |
| ps_top_nbr_4x4, |
| pu1_luma_mode, |
| &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]); |
| } |
| |
| if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
| (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES)) |
| { |
| WORD32 ai4_mpm_mode_list[3]; |
| WORD32 i; |
| |
| WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]; |
| |
| ihevce_populate_intra_pred_mode( |
| ps_top_nbr_4x4->b6_luma_intra_mode, |
| ps_tmp_lt_4x4->b6_luma_intra_mode, |
| s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| cu_pos_y, |
| &ai4_mpm_mode_list[0]); |
| |
| for(i = 0; i < 3; i++) |
| { |
| if(ps_cu_analyse->s_cu_intra_cand |
| .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0) |
| { |
| ASSERT(ai4_mpm_mode_list[i] < 35); |
| |
| ps_cu_analyse->s_cu_intra_cand |
| .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1; |
| pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i]; |
| ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++; |
| i4_curr_index++; |
| } |
| } |
| |
| pu1_luma_mode[i4_curr_index] = 255; |
| } |
| |
| /* loop over candidates for each partition */ |
| for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++) |
| { |
| WORD32 curr_pred_mode; |
| WORD32 bits = 0; |
| LWORD64 curr_cost; |
| WORD32 luma_pred_func_idx; |
| UWORD8 *pu1_curr_ecd_data; |
| WORD16 *pi2_curr_deq_data; |
| WORD32 curr_deq_data_strd; |
| WORD32 pred_strd; |
| UWORD8 *pu1_pred; |
| |
| /* if NXN case the recon and ecd data is stored in temp buffers */ |
| if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0]; |
| pred_strd = trans_size; |
| pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0]; |
| pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0]; |
| curr_deq_data_strd = trans_size; |
| |
| ASSERT(trans_size == MIN_TU_SIZE); |
| } |
| else |
| { |
| pu1_pred = (UWORD8 *)pv_pred_org; |
| pred_strd = pred_strd_org; |
| pu1_curr_ecd_data = pu1_ecd_data; |
| pi2_curr_deq_data = pi2_deq_data; |
| curr_deq_data_strd = deq_data_strd; |
| } |
| |
| pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) + |
| (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride; |
| |
| if(is_sub_pu_in_hq == 1) |
| { |
| curr_pred_mode = cand_ctr; |
| } |
| else |
| { |
| curr_pred_mode = pu1_curr_mode[cand_ctr]; |
| } |
| |
| /* If the candidate mode is 255, then break */ |
| if(255 == curr_pred_mode) |
| { |
| break; |
| } |
| else if(250 == curr_pred_mode) |
| { |
| continue; |
| } |
| |
| /* check if this mode needs to be evaluated or not. For 2nx2n cases, this */ |
| /* function will be called once per candidate, so this check has been done */ |
| /* outside this function call. For NxN case, this function will be called */ |
| /* only once, and all the candidates will be evaluated here. */ |
| if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3) |
| { |
| if((TU_EQ_SUBCU == func_proc_mode) && |
| (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr])) |
| { |
| continue; |
| } |
| } |
| |
| /* call reference filtering */ |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr( |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| trans_size, |
| (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
| curr_pred_mode, |
| ps_ctxt->i1_strong_intra_smoothing_enable_flag); |
| |
| /* use the look up to get the function idx */ |
| luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode]; |
| |
| /* call the intra prediction function */ |
| ps_ctxt->apf_lum_ip[luma_pred_func_idx]( |
| (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
| 1, |
| pu1_pred, |
| pred_strd, |
| trans_size, |
| curr_pred_mode); |
| |
| /* populate the coeffs scan idx */ |
| ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/ |
| if(trans_size < 16) |
| { |
| /* for modes from 22 upto 30 horizontal scan is used */ |
| if((curr_pred_mode > 21) && (curr_pred_mode < 31)) |
| { |
| ps_ctxt->i4_scan_idx = SCAN_HORZ; |
| } |
| /* for modes from 6 upto 14 horizontal scan is used */ |
| else if((curr_pred_mode > 5) && (curr_pred_mode < 15)) |
| { |
| ps_ctxt->i4_scan_idx = SCAN_VERT; |
| } |
| } |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
| i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
| |
| #if DISABLE_RDOQ_INTRA |
| i4_perform_rdoq = 0; |
| #endif |
| |
| /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ |
| /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ |
| /* Currently the complete array will contain only single value*/ |
| /*The rounding factor is calculated with the formula |
| Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 |
| rounding factor = (1 - DeadZone Val) |
| |
| Assumption: Cabac states of All the sub-blocks in the TU are considered independent |
| */ |
| if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)) |
| { |
| if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0)) |
| { |
| double i4_lamda_modifier; |
| |
| if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) |
| { |
| i4_lamda_modifier = |
| ps_ctxt->i4_lamda_modifier * |
| CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); |
| } |
| else |
| { |
| i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; |
| } |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| if(ISLICE == ps_ctxt->i1_slice_type) |
| { |
| i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
| } |
| else |
| { |
| i4_lamda_modifier = CONST_LAMDA_MOD_VAL; |
| } |
| } |
| |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
| &ps_ctxt->i4_quant_round_tu[0][0]; |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
| &ps_ctxt->i4_quant_round_tu[1][0]; |
| |
| memset( |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
| 0, |
| trans_size * trans_size * sizeof(WORD32)); |
| memset( |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
| 0, |
| trans_size * trans_size * sizeof(WORD32)); |
| |
| ihevce_quant_rounding_factor_gen( |
| trans_size, |
| 1, |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
| i4_lamda_modifier, |
| 1); |
| } |
| else |
| { |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
| ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
| ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; |
| } |
| } |
| |
| /* call T Q IT IQ and recon function */ |
| ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_pred, |
| pred_strd, |
| (UWORD8 *)pv_curr_src, |
| src_strd, |
| pi2_curr_deq_data, |
| curr_deq_data_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_curr_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd, |
| trans_size, |
| PRED_MODE_INTRA, |
| &ai8_cand_rdopt_cost[curr_intra_buf_idx], |
| &ai4_curr_bytes[curr_intra_buf_idx], |
| &ai4_tu_bits[curr_intra_buf_idx], |
| &au4_tu_sad[curr_intra_buf_idx], |
| &ai4_zero_col[curr_intra_buf_idx], |
| &ai4_zero_row[curr_intra_buf_idx], |
| &au1_recon_availability[curr_intra_buf_idx], |
| i4_perform_rdoq, |
| i4_perform_sbh, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| 1 /*early_cbf */ |
| ); |
| |
| #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
| ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( |
| pv_curr_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| ai8_cand_rdopt_cost[curr_intra_buf_idx], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| #else |
| if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx]) |
| { |
| ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( |
| pv_curr_src, |
| src_strd, |
| pu1_recon, |
| i4_recon_stride, |
| ai8_cand_rdopt_cost[curr_intra_buf_idx], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| else |
| { |
| ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( |
| pv_curr_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| ai8_cand_rdopt_cost[curr_intra_buf_idx], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| #endif |
| } |
| #endif |
| |
| if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4); |
| } |
| |
| /* based on CBF/No CBF copy the corresponding state */ |
| if(0 == ai4_cbf[curr_intra_buf_idx]) |
| { |
| /* RDOPT copy States :update to init state if 0 cbf */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| else |
| { |
| /* RDOPT copy States :update to new state only if CBF is non zero */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| |
| /* call the function which perform intra mode prediction */ |
| ihevce_intra_pred_mode_signaling( |
| ps_top_nbr_4x4->b6_luma_intra_mode, |
| ps_tmp_lt_4x4->b6_luma_intra_mode, |
| s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| cu_pos_y, |
| curr_pred_mode, |
| &as_intra_prev_rem[curr_intra_buf_idx]); |
| /******************************************************************/ |
| /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN |
| The bits for these are evaluated for every RDO mode of current subcu |
| as they can significantly contribute to RDO cost. Note that these |
| bits are not accounted for here (ai8_cand_rdopt_cost) as they |
| are accounted for in encode_cu call later */ |
| |
| /******************************************************************/ |
| /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN |
| The bits for these are evaluated for every RDO mode of current subcu |
| as they can significantly contribute to RDO cost. Note that these |
| bits are not accounted for here (ai8_cand_rdopt_cost) as they |
| are accounted for in encode_cu call later */ |
| |
| /* Estimate bits to encode prev rem flag for NXN mode */ |
| { |
| WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits |
| [u1_prev_flag_cabac_ctxt ^ |
| as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag]; |
| |
| /* rounding the fractional bits to nearest integer */ |
| bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q); |
| } |
| |
| /* based on prev flag all the mpmidx bits and rem bits */ |
| if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag) |
| { |
| /* mpm_idx */ |
| bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1; |
| } |
| else |
| { |
| /* rem intra mode */ |
| bits += 5; |
| } |
| |
| bits += ai4_tu_bits[curr_intra_buf_idx]; |
| |
| /* compute the total cost for current candidate */ |
| curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx]; |
| |
| /* get the final ssd cost */ |
| curr_cost += |
| COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| |
| /* check of the best candidate cost */ |
| if(curr_cost < best_cand_cost) |
| { |
| best_cand_cost = curr_cost; |
| best_cand_idx = cand_ctr; |
| best_intra_buf_idx = curr_intra_buf_idx; |
| curr_intra_buf_idx = !curr_intra_buf_idx; |
| } |
| } |
| |
| /*************** For TU_EQ_SUBCU case *****************/ |
| /* Copy the pred for best cand. to the final pred array */ |
| /* Copy the iq-coeff for best cand. to the final array */ |
| /* copy the best coeffs data to final buffer */ |
| if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| /* Copy the pred for best cand. to the final pred array */ |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| (UWORD8 *)pv_pred_org, |
| pred_strd_org, |
| &au1_cur_pred_data[best_intra_buf_idx][0], |
| trans_size, |
| trans_size, |
| trans_size); |
| |
| /* Copy the deq-coeff for best cand. to the final array */ |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| (UWORD8 *)pi2_deq_data, |
| deq_data_strd << 1, |
| (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0], |
| trans_size << 1, |
| trans_size << 1, |
| trans_size); |
| /* copy the coeffs to final cu ecd bytes buffer */ |
| memcpy( |
| pu1_ecd_data, |
| &au1_intra_coeffs[best_intra_buf_idx][0], |
| ai4_curr_bytes[best_intra_buf_idx]); |
| |
| pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) + |
| (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride; |
| } |
| |
| /*---------- Calculate Recon for the best INTRA mode ---------*/ |
| /* TU_EQ_CU case : No need for recon, otherwise recon is required */ |
| /* Compute recon only for the best mode for TU_EQ_SUBCU case */ |
| if(u1_compute_recon) |
| { |
| ihevce_it_recon_fxn( |
| ps_ctxt, |
| pi2_deq_data, |
| deq_data_strd, |
| (UWORD8 *)pv_pred_org, |
| pred_strd_org, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| trans_size, |
| PRED_MODE_INTRA, |
| ai4_cbf[best_intra_buf_idx], |
| ai4_zero_col[best_intra_buf_idx], |
| ai4_zero_row[best_intra_buf_idx]); |
| |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx; |
| } |
| else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx]) |
| { |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx; |
| } |
| else |
| { |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; |
| } |
| |
| /* RDOPT copy States :update to best modes state */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| /* copy the prev,mpm_idx and rem modes from best cand */ |
| ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx]; |
| |
| /* update the cabac context of prev intra pred mode flag */ |
| u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state |
| [(u1_prev_flag_cabac_ctxt << 1) | |
| as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag]; |
| |
| /* accumulate the TU bits into cu bits */ |
| cu_bits += ai4_tu_bits[best_intra_buf_idx]; |
| |
| /* copy the intra pred mode for chroma reuse */ |
| if(is_sub_pu_in_hq == 0) |
| { |
| *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx]; |
| } |
| else |
| { |
| *pu1_intra_pred_mode = best_cand_idx; |
| } |
| |
| /* Store luma mode as chroma mode. If chroma prcs happens, and |
| if a diff. mode wins, it should update this!! */ |
| if(1 == chrm_present_flag) |
| { |
| if(is_sub_pu_in_hq == 0) |
| { |
| ps_final_prms->u1_chroma_intra_pred_actual_mode = |
| ((ps_ctxt->u1_chroma_array_type == 2) |
| ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]] |
| : pu1_curr_mode[best_cand_idx]); |
| } |
| else |
| { |
| ps_final_prms->u1_chroma_intra_pred_actual_mode = |
| ((ps_ctxt->u1_chroma_array_type == 2) |
| ? gau1_chroma422_intra_angle_mapping[best_cand_idx] |
| : best_cand_idx); |
| } |
| |
| ps_final_prms->u1_chroma_intra_pred_mode = 4; |
| } |
| |
| /*remember the cbf flag to replicate qp for 4x4 neighbour*/ |
| ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx]; |
| |
| /*accumulate ssd over all TU of intra CU*/ |
| ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx]; |
| |
| /* update the bytes */ |
| ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = |
| ai4_curr_bytes[best_intra_buf_idx]; |
| /* update the zero_row and col info for the final mode */ |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = |
| ai4_zero_col[best_intra_buf_idx]; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = |
| ai4_zero_row[best_intra_buf_idx]; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
| |
| /* update the total bytes cons */ |
| ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx]; |
| pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx]; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx]; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0; |
| GETRANGE(tx_size, trans_size); |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y; |
| |
| /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ |
| ps_nbr_4x4->b1_skip_flag = 0; |
| ps_nbr_4x4->b1_intra_flag = 1; |
| ps_nbr_4x4->b1_pred_l0_flag = 0; |
| ps_nbr_4x4->b1_pred_l1_flag = 0; |
| |
| if(is_sub_pu_in_hq == 0) |
| { |
| ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx]; |
| } |
| else |
| { |
| ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx; |
| } |
| |
| ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx]; |
| |
| /* since tu size can be less than cusize, replication is done with strd */ |
| { |
| WORD32 i, j; |
| nbr_4x4_t *ps_tmp_4x4; |
| |
| ps_tmp_4x4 = ps_nbr_4x4; |
| |
| for(i = 0; i < num_4x4_in_tu; i++) |
| { |
| for(j = 0; j < num_4x4_in_tu; j++) |
| { |
| ps_tmp_4x4[j] = *ps_nbr_4x4; |
| } |
| /* row level update*/ |
| ps_tmp_4x4 += num_4x4_in_cu; |
| } |
| } |
| |
| if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1); |
| } |
| |
| if((num_cu_parts > 1) && (ctr < 3)) |
| { |
| /* set the neighbour map to 1 */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| trans_size >> 2, |
| 1); |
| |
| /* block level updates block number (1 & 3 )*/ |
| pv_curr_src = (UWORD8 *)pv_curr_src + trans_size; |
| pv_pred_org = (UWORD8 *)pv_pred_org + trans_size; |
| pi2_deq_data += trans_size; |
| |
| switch(ctr) |
| { |
| case 0: |
| { |
| pu1_left = pu1_recon + trans_size - 1; |
| pu1_top += trans_size; |
| pu1_top_left = pu1_top - 1; |
| left_strd = i4_recon_stride; |
| |
| break; |
| } |
| case 1: |
| { |
| ASSERT( |
| (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) || |
| (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1)); |
| |
| /* Since the 'lumaRefSubstitution' function expects both Top and */ |
| /* TopRight recon pixels to be present in the same buffer */ |
| if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] != |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]) |
| { |
| UWORD8 *pu1_src = |
| ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) + |
| trans_size; |
| UWORD8 *pu1_dst = |
| ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) + |
| trans_size; |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size); |
| |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] = |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]; |
| } |
| |
| pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride; |
| pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) + |
| (trans_size - 1) * i4_recon_stride; |
| pu1_top_left = pu1_left - cu_left_stride; |
| left_strd = cu_left_stride; |
| |
| break; |
| } |
| case 2: |
| { |
| ASSERT( |
| (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) || |
| (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1)); |
| |
| pu1_left = pu1_recon + trans_size - 1; |
| pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) + |
| (trans_size - 1) * i4_recon_stride + trans_size; |
| pu1_top_left = pu1_top - 1; |
| left_strd = i4_recon_stride; |
| |
| break; |
| } |
| } |
| |
| pu1_csbf_buf += num_4x4_in_tu; |
| cu_pos_x += num_4x4_in_tu; |
| ps_nbr_4x4 += num_4x4_in_tu; |
| ps_top_nbr_4x4 += num_4x4_in_tu; |
| ps_tmp_lt_4x4 = ps_nbr_4x4 - 1; |
| |
| pu1_intra_pred_mode++; |
| |
| /* after 2 blocks increment the pointers to bottom blocks */ |
| if(1 == ctr) |
| { |
| pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1); |
| pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd); |
| |
| pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1); |
| pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org); |
| pi2_deq_data -= (trans_size << 1); |
| pi2_deq_data += (trans_size * deq_data_strd); |
| |
| pu1_csbf_buf -= (num_4x4_in_tu << 1); |
| pu1_csbf_buf += (num_4x4_in_tu * csbf_strd); |
| |
| ps_nbr_4x4 -= (num_4x4_in_tu << 1); |
| ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu); |
| ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu; |
| ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd); |
| |
| /* decrement pos x to start */ |
| cu_pos_x -= (num_4x4_in_tu << 1); |
| cu_pos_y += num_4x4_in_tu; |
| } |
| } |
| |
| #if RDOPT_ENABLE |
| /* compute the RDOPT cost for the current TU */ |
| ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30( |
| ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| #endif |
| |
| /* accumulate the costs */ |
| total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx]; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(total_rdopt_cost > prev_best_rdopt_cost) |
| { |
| return (total_rdopt_cost); |
| } |
| } |
| |
| /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/ |
| chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE; |
| |
| pu4_nbr_flags++; |
| } |
| /* Modify the cost function for this CU. */ |
| /* loop in for 8x8 blocks */ |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| UWORD8 *pu1_recon_cu; |
| WORD32 recon_stride; |
| WORD32 curr_pos_x; |
| WORD32 curr_pos_y; |
| WORD32 start_index; |
| WORD32 num_horz_cu_in_ctb; |
| WORD32 cu_size; |
| WORD32 had_block_size; |
| |
| /* tODO: sreenivasa ctb size has to be used appropriately */ |
| had_block_size = 8; |
| cu_size = ps_cu_analyse->u1_cu_size; /* todo */ |
| num_horz_cu_in_ctb = 64 / had_block_size; |
| |
| curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ |
| curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */ |
| recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
| pu1_recon_cu = |
| ((UWORD8 *)ps_final_prms->s_recon_datastore |
| .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]); |
| /* + \ curr_pos_x + curr_pos_y * recon_stride; */ |
| |
| /* start index to index the source satd of curr cu int he current ctb*/ |
| start_index = |
| (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
| |
| { |
| total_rdopt_cost += ihevce_psy_rd_cost( |
| ps_ctxt->ai4_source_satd_8x8, |
| pu1_recon_cu, |
| recon_stride, |
| 1, // |
| cu_size, |
| 0, // pic type |
| 0, //layer id |
| ps_ctxt->i4_satd_lamda, // lambda |
| start_index, |
| ps_ctxt->u1_is_input_data_hbd, |
| ps_ctxt->u4_psy_strength, |
| &ps_ctxt->s_cmn_opt_func |
| |
| ); // 8 bit |
| } |
| } |
| |
| #if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ |
| if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| UWORD8 au1_tu_eq_cu_div2_modes[4]; |
| UWORD8 au1_freq_of_mode[4]; |
| |
| WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D( |
| ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4); |
| |
| if(1 == i4_num_clusters) |
| { |
| ps_final_prms->u2_num_pus_in_cu = 1; |
| ps_final_prms->u1_part_mode = SIZE_2Nx2N; |
| } |
| } |
| #endif |
| |
| /* store the num TUs*/ |
| ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu; |
| |
| /* update the bytes consumed */ |
| ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; |
| |
| /* store the current cu size to final prms */ |
| ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size; |
| |
| /* cu bits will be having luma residual bits till this point */ |
| /* if zero_cbf eval is disabled then cu bits will be zero */ |
| ps_final_prms->u4_cu_luma_res_bits = cu_bits; |
| |
| /* ------------- Chroma processing -------------- */ |
| /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ |
| if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
| { |
| LWORD64 chrm_rdopt_cost; |
| WORD32 chrm_rdopt_tu_bits; |
| |
| /* Store the current RDOPT cost to enable early exit in chrom_prcs */ |
| ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; |
| |
| chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( |
| ps_ctxt, |
| curr_buf_idx, |
| func_proc_mode, |
| ps_chrm_cu_buf_prms->pu1_curr_src, |
| ps_chrm_cu_buf_prms->i4_chrm_src_stride, |
| ps_chrm_cu_buf_prms->pu1_cu_left, |
| ps_chrm_cu_buf_prms->pu1_cu_top, |
| ps_chrm_cu_buf_prms->pu1_cu_top_left, |
| ps_chrm_cu_buf_prms->i4_cu_left_stride, |
| cu_pos_x_8pelunits, |
| cu_pos_y_8pelunits, |
| &chrm_rdopt_tu_bits, |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy); |
| |
| #if WEIGH_CHROMA_COST |
| chrm_rdopt_cost = (LWORD64)( |
| (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| |
| #if CHROMA_RDOPT_ENABLE |
| total_rdopt_cost += chrm_rdopt_cost; |
| #endif |
| cu_bits += chrm_rdopt_tu_bits; |
| |
| /* cu bits for chroma residual if chroma rdopt is on */ |
| /* if zero_cbf eval is disabled then cu bits will be zero */ |
| ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(total_rdopt_cost > prev_best_rdopt_cost) |
| { |
| return (total_rdopt_cost); |
| } |
| } |
| } |
| else |
| {} |
| |
| /* RDOPT copy States : Best after all luma TUs to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| /* get the neighbour availability flags for current cu */ |
| ihevce_get_only_nbr_flag( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (cu_pos_x_8pelunits << 1), |
| (cu_pos_y_8pelunits << 1), |
| (trans_size << 1), |
| (trans_size << 1)); |
| |
| /* call the entropy rdo encode to get the bit estimate for current cu */ |
| /*if ZERO_CBF eval is enabled then this function will return only CU header bits */ |
| { |
| /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ |
| WORD32 cbf_bits, header_bits; |
| |
| header_bits = ihevce_entropy_rdo_encode_cu( |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| ps_final_prms, |
| cu_pos_x_8pelunits, |
| cu_pos_y_8pelunits, |
| ps_cu_analyse->u1_cu_size, |
| s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| &ps_final_prms->pu1_cu_coeffs[0], |
| &cbf_bits); |
| |
| cu_bits += header_bits; |
| |
| /* cbf bits are excluded from header bits, instead considered as texture bits */ |
| /* incase if zero cbf eval is disabled then texture bits gets added here */ |
| ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); |
| ps_final_prms->u4_cu_cbf_bits = cbf_bits; |
| |
| #if RDOPT_ENABLE |
| /* add the cost of coding the cu bits */ |
| total_rdopt_cost += |
| COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| #endif |
| } |
| return (total_rdopt_cost); |
| } |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_inter_rdopt_cu_ntu \endif |
| * |
| * \brief |
| * Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] ps_inter_cand pointer to inter candidate structure |
| * \param[in] pu1_src pointer to source data buffer |
| * \param[in] cu_size Current CU size |
| * \param[in] cu_pos_x cu position x w.r.t to ctb |
| * \param[in] cu_pos_y cu position y w.r.t to ctb |
| * \param[in] src_strd source buffer stride |
| * \param[in] curr_buf_idx buffer index for current output storage |
| * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure |
| * |
| * \return |
| * Rdopt cost |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| LWORD64 ihevce_inter_rdopt_cu_ntu( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| enc_loop_cu_prms_t *ps_cu_prms, |
| void *pv_src, |
| WORD32 cu_size, |
| WORD32 cu_pos_x, |
| WORD32 cu_pos_y, |
| WORD32 curr_buf_idx, |
| enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
| cu_inter_cand_t *ps_inter_cand, |
| cu_analyse_t *ps_cu_analyse, |
| WORD32 i4_alpha_stim_multiplier) |
| { |
| enc_loop_cu_final_prms_t *ps_final_prms; |
| nbr_4x4_t *ps_nbr_4x4; |
| tu_prms_t s_tu_prms[64 * 4]; |
| tu_prms_t *ps_tu_prms; |
| |
| WORD32 i4_perform_rdoq; |
| WORD32 i4_perform_sbh; |
| WORD32 ai4_tu_split_flags[4]; |
| WORD32 ai4_tu_early_cbf[4]; |
| WORD32 num_split_flags = 1; |
| WORD32 i; |
| UWORD8 u1_tu_size; |
| UWORD8 *pu1_pred; |
| UWORD8 *pu1_ecd_data; |
| WORD16 *pi2_deq_data; |
| UWORD8 *pu1_csbf_buf; |
| UWORD8 *pu1_tu_sz_sft; |
| UWORD8 *pu1_tu_posx; |
| UWORD8 *pu1_tu_posy; |
| LWORD64 total_rdopt_cost; |
| WORD32 ctr; |
| WORD32 chrm_ctr; |
| WORD32 num_tu_in_cu = 0; |
| WORD32 pred_stride; |
| WORD32 recon_stride; |
| WORD32 trans_size = ps_cu_analyse->u1_cu_size; |
| WORD32 csbf_strd; |
| WORD32 chrm_present_flag; |
| WORD32 ecd_data_bytes_cons; |
| WORD32 num_4x4_in_cu; |
| WORD32 num_4x4_in_tu; |
| WORD32 recon_func_mode; |
| WORD32 cu_bits; |
| UWORD8 u1_compute_spatial_ssd; |
| |
| /* min_trans_size is initialized to some huge number than usual TU sizes */ |
| WORD32 i4_min_trans_size = 256; |
| /* Get the RDOPT cost of the best CU mode for early_exit */ |
| LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; |
| WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; |
| |
| /* model for no residue syntax qt root cbf flag */ |
| UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX]; |
| |
| /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
| UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END]; |
| |
| /* for skip cases tables are not reqquired */ |
| UWORD8 u1_skip_tu_sz_sft = 0; |
| UWORD8 u1_skip_tu_posx = 0; |
| UWORD8 u1_skip_tu_posy = 0; |
| UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; |
| |
| /* get the pointers based on curbuf idx */ |
| ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
| ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
| pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; |
| pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; |
| csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
| pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
| |
| pred_stride = ps_inter_cand->i4_pred_data_stride; |
| recon_stride = cu_size; |
| pu1_pred = ps_inter_cand->pu1_pred_data; |
| chrm_ctr = 0; |
| ecd_data_bytes_cons = 0; |
| total_rdopt_cost = 0; |
| num_4x4_in_cu = cu_size >> 2; |
| recon_func_mode = PRED_MODE_INTER; |
| cu_bits = 0; |
| |
| /* get the 4x4 level postion of current cu */ |
| cu_pos_x = cu_pos_x << 1; |
| cu_pos_y = cu_pos_y << 1; |
| |
| /* default value for cu coded flag */ |
| ps_final_prms->u1_is_cu_coded = 0; |
| |
| /*init of ssd of CU accuumulated over all TU*/ |
| ps_final_prms->u4_cu_sad = 0; |
| |
| /* populate the coeffs scan idx */ |
| ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| #if ENABLE_INTER_ZCU_COST |
| /* reset cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost = 0; |
| |
| /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
| memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END); |
| #endif |
| |
| if(ps_cu_analyse->u1_cu_size == 64) |
| { |
| num_split_flags = 4; |
| u1_tu_size = 32; |
| } |
| else |
| { |
| num_split_flags = 1; |
| u1_tu_size = ps_cu_analyse->u1_cu_size; |
| } |
| |
| /* ckeck for skip mode */ |
| if(1 == ps_final_prms->u1_skip_flag) |
| { |
| if(64 == cu_size) |
| { |
| /* TU = CU/2 is set but no trnaform is evaluated */ |
| num_tu_in_cu = 4; |
| pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
| pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
| pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
| } |
| else |
| { |
| /* TU = CU is set but no trnaform is evaluated */ |
| num_tu_in_cu = 1; |
| pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
| pu1_tu_posx = &u1_skip_tu_posx; |
| pu1_tu_posy = &u1_skip_tu_posy; |
| } |
| |
| recon_func_mode = PRED_MODE_SKIP; |
| } |
| /* check for PU part mode being AMP or No AMP */ |
| else if(ps_final_prms->u1_part_mode < SIZE_2NxnU) |
| { |
| if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64)) |
| { |
| /* TU= CU is evaluated 2Nx2N inter case */ |
| num_tu_in_cu = 1; |
| pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
| pu1_tu_posx = &u1_skip_tu_posx; |
| pu1_tu_posy = &u1_skip_tu_posy; |
| } |
| else |
| { |
| /* currently TU= CU/2 is evaluated for all inter case */ |
| num_tu_in_cu = 4; |
| pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
| pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
| pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
| } |
| } |
| else |
| { |
| /* for AMP cases one level of TU recurssion is done */ |
| /* based on oreintation of the partitions */ |
| num_tu_in_cu = 10; |
| pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
| pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
| pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
| } |
| |
| ps_tu_prms = &s_tu_prms[0]; |
| num_tu_in_cu = 0; |
| |
| for(i = 0; i < num_split_flags; i++) |
| { |
| WORD32 i4_x_off = 0, i4_y_off = 0; |
| |
| if(i == 1 || i == 3) |
| { |
| i4_x_off = 32; |
| } |
| |
| if(i == 2 || i == 3) |
| { |
| i4_y_off = 32; |
| } |
| |
| if(1 == ps_final_prms->u1_skip_flag) |
| { |
| ai4_tu_split_flags[0] = 0; |
| ps_inter_cand->ai4_tu_split_flag[i] = 0; |
| |
| ai4_tu_early_cbf[0] = 0; |
| } |
| else |
| { |
| ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i]; |
| ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i]; |
| } |
| |
| ps_tu_prms->u1_tu_size = u1_tu_size; |
| |
| ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
| ps_tu_prms, |
| &num_tu_in_cu, |
| 0, |
| ai4_tu_split_flags[0], |
| ai4_tu_early_cbf[0], |
| i4_x_off, |
| i4_y_off); |
| } |
| |
| /* loop for all tu blocks in current cu */ |
| ps_tu_prms = &s_tu_prms[0]; |
| for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
| { |
| trans_size = ps_tu_prms->u1_tu_size; |
| |
| if(i4_min_trans_size > trans_size) |
| { |
| i4_min_trans_size = trans_size; |
| } |
| ps_tu_prms++; |
| } |
| |
| if(ps_ctxt->i1_cu_qp_delta_enable) |
| { |
| ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0); |
| } |
| |
| if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) |
| { |
| ps_ctxt->i8_cl_ssd_lambda_qf = |
| ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / |
| 100.0f); |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
| ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * |
| (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
| } |
| |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
| (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| |
| if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
| { |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| } |
| |
| if(!u1_compute_spatial_ssd) |
| { |
| ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
| ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
| } |
| else |
| { |
| ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1; |
| } |
| |
| ps_tu_prms = &s_tu_prms[0]; |
| |
| ASSERT(num_tu_in_cu <= 256); |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| memcpy( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_COEFFX_PREFIX); |
| |
| for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
| { |
| WORD32 curr_bytes; |
| WORD32 tx_size; |
| WORD32 cbf, zero_col, zero_row; |
| LWORD64 rdopt_cost; |
| UWORD8 u1_is_recon_available; |
| |
| WORD32 curr_pos_x; |
| WORD32 curr_pos_y; |
| nbr_4x4_t *ps_cur_nbr_4x4; |
| UWORD8 *pu1_cur_pred; |
| UWORD8 *pu1_cur_src; |
| UWORD8 *pu1_cur_recon; |
| WORD16 *pi2_cur_deq_data; |
| UWORD32 u4_tu_sad; |
| WORD32 tu_bits; |
| |
| WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
| |
| trans_size = ps_tu_prms->u1_tu_size; |
| /* get the current pos x and pos y in pixels */ |
| curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]); |
| curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]); |
| |
| num_4x4_in_tu = trans_size >> 2; |
| |
| #if FORCE_8x8_TFR |
| if(cu_size == 64) |
| { |
| curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]); |
| curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]); |
| } |
| #endif |
| |
| /* increment the pointers to start of current TU */ |
| pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x); |
| pu1_cur_src += (curr_pos_y * src_strd); |
| pu1_cur_pred = (pu1_pred + curr_pos_x); |
| pu1_cur_pred += (curr_pos_y * pred_stride); |
| pi2_cur_deq_data = pi2_deq_data + curr_pos_x; |
| pi2_cur_deq_data += (curr_pos_y * cu_size); |
| pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) + |
| curr_pos_x + curr_pos_y * i4_recon_stride; |
| |
| ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2)); |
| ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu); |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
| i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
| |
| /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ |
| /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ |
| /* Currently the complete array will contain only single value*/ |
| /*The rounding factor is calculated with the formula |
| Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 |
| rounding factor = (1 - DeadZone Val) |
| |
| Assumption: Cabac states of All the sub-blocks in the TU are considered independent |
| */ |
| if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0)) |
| { |
| double i4_lamda_modifier; |
| |
| if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) |
| { |
| i4_lamda_modifier = ps_ctxt->i4_lamda_modifier * |
| CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); |
| } |
| else |
| { |
| i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; |
| } |
| if(ps_ctxt->i4_use_const_lamda_modifier) |
| { |
| if(ISLICE == ps_ctxt->i1_slice_type) |
| { |
| i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
| } |
| else |
| { |
| i4_lamda_modifier = CONST_LAMDA_MOD_VAL; |
| } |
| } |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
| &ps_ctxt->i4_quant_round_tu[0][0]; |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
| &ps_ctxt->i4_quant_round_tu[1][0]; |
| |
| memset( |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
| 0, |
| trans_size * trans_size * sizeof(WORD32)); |
| memset( |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
| 0, |
| trans_size * trans_size * sizeof(WORD32)); |
| |
| ihevce_quant_rounding_factor_gen( |
| trans_size, |
| 1, |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
| i4_lamda_modifier, |
| 1); |
| } |
| else |
| { |
| ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
| ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; |
| ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
| ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; |
| } |
| |
| /* call T Q IT IQ and recon function */ |
| cbf = ihevce_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred, |
| pred_stride, |
| pu1_cur_src, |
| src_strd, |
| pi2_cur_deq_data, |
| cu_size, |
| pu1_cur_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd, |
| trans_size, |
| recon_func_mode, |
| &rdopt_cost, |
| &curr_bytes, |
| &tu_bits, |
| &u4_tu_sad, |
| &zero_col, |
| &zero_row, |
| &u1_is_recon_available, |
| i4_perform_rdoq, |
| i4_perform_sbh, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1); |
| |
| #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
| rdopt_cost = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| src_strd, |
| pu1_cur_pred, |
| pred_stride, |
| rdopt_cost, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| #else |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| rdopt_cost = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| src_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| rdopt_cost, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| NULL_PLANE); |
| } |
| else |
| { |
| rdopt_cost = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| src_strd, |
| pu1_cur_pred, |
| pred_stride, |
| rdopt_cost, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| NULL_PLANE); |
| } |
| #endif |
| } |
| #endif |
| |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0; |
| } |
| else |
| { |
| ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; |
| } |
| |
| /* accumulate the TU sad into cu sad */ |
| ps_final_prms->u4_cu_sad += u4_tu_sad; |
| |
| /* accumulate the TU bits into cu bits */ |
| cu_bits += tu_bits; |
| |
| /* inter cu is coded if any of the tu is coded in it */ |
| ps_final_prms->u1_is_cu_coded |= cbf; |
| |
| /* call the entropy function to get the bits */ |
| /* add that to rd opt cost(SSD) */ |
| |
| /* update the bytes */ |
| ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes; |
| /* update the zero_row and col info for the final mode */ |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row; |
| |
| /* update the bytes */ |
| ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
| |
| /* update the total bytes cons */ |
| ecd_data_bytes_cons += curr_bytes; |
| pu1_ecd_data += curr_bytes; |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cbf) |
| { |
| /* update to new state only if CBF is non zero */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| |
| /* by default chroma present is set to 1*/ |
| chrm_present_flag = 1; |
| if(4 == trans_size) |
| { |
| /* if tusize is 4x4 then only first luma 4x4 will have chroma*/ |
| if(0 != chrm_ctr) |
| { |
| chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE; |
| } |
| |
| /* increment the chrm ctr unconditionally */ |
| chrm_ctr++; |
| |
| /* after ctr reached 4 reset it */ |
| if(4 == chrm_ctr) |
| { |
| chrm_ctr = 0; |
| } |
| } |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0; |
| GETRANGE(tx_size, trans_size); |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2); |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2); |
| |
| /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ |
| ps_cur_nbr_4x4->b1_y_cbf = cbf; |
| /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/ |
| ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; |
| |
| /* Qp and cbf are stored for the all 4x4 in TU */ |
| { |
| WORD32 i, j; |
| nbr_4x4_t *ps_tmp_4x4; |
| ps_tmp_4x4 = ps_cur_nbr_4x4; |
| |
| for(i = 0; i < num_4x4_in_tu; i++) |
| { |
| for(j = 0; j < num_4x4_in_tu; j++) |
| { |
| ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp; |
| ps_tmp_4x4[j].b1_y_cbf = cbf; |
| } |
| /* row level update*/ |
| ps_tmp_4x4 += num_4x4_in_cu; |
| } |
| } |
| |
| #if RDOPT_ENABLE |
| /* compute the rdopt cost */ |
| rdopt_cost += |
| COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| #endif |
| /* accumulate the costs */ |
| total_rdopt_cost += rdopt_cost; |
| |
| ps_tu_prms++; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(total_rdopt_cost > prev_best_rdopt_cost) |
| { |
| return (total_rdopt_cost); |
| } |
| } |
| } |
| |
| /* Modify the cost function for this CU. */ |
| /* loop in for 8x8 blocks */ |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| UWORD8 *pu1_recon_cu; |
| WORD32 recon_stride; |
| WORD32 curr_pos_x; |
| WORD32 curr_pos_y; |
| WORD32 start_index; |
| WORD32 num_horz_cu_in_ctb; |
| WORD32 had_block_size; |
| |
| /* tODO: sreenivasa ctb size has to be used appropriately */ |
| had_block_size = 8; |
| num_horz_cu_in_ctb = 64 / had_block_size; |
| |
| curr_pos_x = cu_pos_x << 2; /* pel units */ |
| curr_pos_y = cu_pos_y << 2; /* pel units */ |
| recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
| pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore |
| .apv_luma_recon_bufs[0]); // already pointing to the current CU recon |
| //+ \curr_pos_x + curr_pos_y * recon_stride; |
| |
| /* start index to index the source satd of curr cu int he current ctb*/ |
| start_index = |
| (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
| |
| { |
| total_rdopt_cost += ihevce_psy_rd_cost( |
| ps_ctxt->ai4_source_satd_8x8, |
| pu1_recon_cu, |
| recon_stride, |
| 1, //howz stride |
| cu_size, |
| 0, // pic type |
| 0, //layer id |
| ps_ctxt->i4_satd_lamda, // lambda |
| start_index, |
| ps_ctxt->u1_is_input_data_hbd, |
| ps_ctxt->u4_psy_strength, |
| &ps_ctxt->s_cmn_opt_func); // 8 bit |
| } |
| } |
| |
| /* store the num TUs*/ |
| ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu; |
| |
| /* update the bytes consumed */ |
| ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; |
| |
| /* store the current cu size to final prms */ |
| ps_final_prms->u1_cu_size = cu_size; |
| |
| /* cu bits will be having luma residual bits till this point */ |
| /* if zero_cbf eval is disabled then cu bits will be zero */ |
| ps_final_prms->u4_cu_luma_res_bits = cu_bits; |
| |
| /* ------------- Chroma processing -------------- */ |
| /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ |
| if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
| { |
| LWORD64 chrm_rdopt_cost; |
| WORD32 chrm_rdopt_tu_bits; |
| |
| /* Store the current RDOPT cost to enable early exit in chrom_prcs */ |
| ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; |
| |
| chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( |
| ps_ctxt, |
| curr_buf_idx, |
| 0, /* TU mode : Don't care in Inter patrh */ |
| ps_chrm_cu_buf_prms->pu1_curr_src, |
| ps_chrm_cu_buf_prms->i4_chrm_src_stride, |
| ps_chrm_cu_buf_prms->pu1_cu_left, |
| ps_chrm_cu_buf_prms->pu1_cu_top, |
| ps_chrm_cu_buf_prms->pu1_cu_top_left, |
| ps_chrm_cu_buf_prms->i4_cu_left_stride, |
| (cu_pos_x >> 1), |
| (cu_pos_y >> 1), |
| &chrm_rdopt_tu_bits, |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy); |
| |
| #if WEIGH_CHROMA_COST |
| chrm_rdopt_cost = (LWORD64)( |
| (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| |
| #if CHROMA_RDOPT_ENABLE |
| total_rdopt_cost += chrm_rdopt_cost; |
| #endif |
| cu_bits += chrm_rdopt_tu_bits; |
| |
| /* during chroma evaluation if skip decision was over written */ |
| /* then the current skip candidate is set to a non skip candidate */ |
| ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag; |
| |
| /* cu bits for chroma residual if chroma rdopt is on */ |
| /* if zero_cbf eval is disabled then cu bits will be zero */ |
| ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(total_rdopt_cost > prev_best_rdopt_cost) |
| { |
| return (total_rdopt_cost); |
| } |
| } |
| } |
| else |
| {} |
| |
| #if SHRINK_INTER_TUTREE |
| /* ------------- Quadtree TU split optimization ------------ */ |
| if(ps_final_prms->u1_is_cu_coded) |
| { |
| ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree( |
| &ps_final_prms->as_tu_enc_loop[0], |
| &ps_final_prms->as_tu_enc_loop_temp_prms[0], |
| &ps_final_prms->s_recon_datastore, |
| num_tu_in_cu, |
| (ps_ctxt->u1_chroma_array_type == 2)); |
| } |
| #endif |
| |
| /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| /* -------- Bit estimate for RD opt -------------- */ |
| { |
| nbr_avail_flags_t s_nbr; |
| /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ |
| WORD32 cbf_bits, header_bits; |
| |
| /* get the neighbour availability flags for current cu */ |
| ihevce_get_only_nbr_flag( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| (cu_size >> 2), |
| (cu_size >> 2)); |
| |
| /* call the entropy rdo encode to get the bit estimate for current cu */ |
| header_bits = ihevce_entropy_rdo_encode_cu( |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| ps_final_prms, |
| (cu_pos_x >> 1), /* back to 8x8 pel units */ |
| (cu_pos_y >> 1), /* back to 8x8 pel units */ |
| cu_size, |
| ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail |
| : s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| &ps_final_prms->pu1_cu_coeffs[0], |
| &cbf_bits); |
| |
| cu_bits += header_bits; |
| |
| /* cbf bits are excluded from header bits, instead considered as texture bits */ |
| /* incase if zero cbf eval is disabled then texture bits gets added here */ |
| ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); |
| ps_final_prms->u4_cu_cbf_bits = cbf_bits; |
| |
| #if RDOPT_ENABLE |
| /* add the cost of coding the header bits */ |
| total_rdopt_cost += |
| COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| |
| #if ENABLE_INTER_ZCU_COST |
| /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */ |
| if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level)) |
| { |
| LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; |
| |
| WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) && |
| (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag); |
| |
| cab_ctxt_t *ps_cab_ctxt = |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt; |
| |
| /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */ |
| UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12; |
| |
| /* account for coding qt_root_cbf = 0 */ |
| /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */ |
| u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0]; |
| if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]) |
| u4_cu_hdr_bits_q12 = 0; |
| else |
| u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]; |
| |
| /* add the cost of coding the header bits */ |
| i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30( |
| u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */, |
| ps_ctxt->i8_cl_ssd_lambda_qf, |
| (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); |
| |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| i8_cu_not_coded_cost = total_rdopt_cost + 1; |
| } |
| |
| /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */ |
| if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu)) |
| { |
| WORD32 tx_size; |
| |
| /* force cu as not coded and update the cost */ |
| ps_final_prms->u1_is_cu_coded = 0; |
| ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
| ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
| |
| total_rdopt_cost = i8_cu_not_coded_cost; |
| |
| /* reset num TUs to 1 unless cu size id 64 */ |
| ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1; |
| trans_size = (64 == cu_size) ? 32 : cu_size; |
| GETRANGE(tx_size, trans_size); |
| |
| /* reset the bytes consumed */ |
| ps_final_prms->i4_num_bytes_ecd_data = 0; |
| |
| /* reset texture related bits and roll back header bits*/ |
| ps_final_prms->u4_cu_cbf_bits = 0; |
| ps_final_prms->u4_cu_luma_res_bits = 0; |
| ps_final_prms->u4_cu_chroma_res_bits = 0; |
| ps_final_prms->u4_cu_hdr_bits = |
| (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q; |
| |
| /* update cabac model with qtroot cbf = 0 decision */ |
| ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] = |
| gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1]; |
| |
| /* restore untouched cabac models for, tusplit, cbfs, texture etc */ |
| memcpy( |
| &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
| &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
| (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM)); |
| |
| /* mark all tus as not coded for final eval */ |
| for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++) |
| { |
| WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0; |
| WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0; |
| |
| nbr_4x4_t *ps_cur_nbr_4x4 = |
| ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu); |
| |
| num_4x4_in_tu = trans_size >> 2; |
| |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y; |
| |
| /* reset cbf for the all 4x4 in TU */ |
| { |
| WORD32 i, j; |
| nbr_4x4_t *ps_tmp_4x4; |
| ps_tmp_4x4 = ps_cur_nbr_4x4; |
| |
| for(i = 0; i < num_4x4_in_tu; i++) |
| { |
| for(j = 0; j < num_4x4_in_tu; j++) |
| { |
| ps_tmp_4x4[j].b1_y_cbf = 0; |
| } |
| /* row level update*/ |
| ps_tmp_4x4 += num_4x4_in_cu; |
| } |
| } |
| } |
| } |
| } |
| #endif /* ENABLE_INTER_ZCU_COST */ |
| |
| #endif /* RDOPT_ENABLE */ |
| } |
| |
| return (total_rdopt_cost); |
| } |
| |
| #if ENABLE_RDO_BASED_TU_RECURSION |
| LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| enc_loop_cu_prms_t *ps_cu_prms, |
| void *pv_src, |
| WORD32 cu_size, |
| WORD32 cu_pos_x, |
| WORD32 cu_pos_y, |
| WORD32 curr_buf_idx, |
| enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
| cu_inter_cand_t *ps_inter_cand, |
| cu_analyse_t *ps_cu_analyse, |
| WORD32 i4_alpha_stim_multiplier) |
| { |
| tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1]; |
| buffer_data_for_tu_t s_buffer_data_for_tu; |
| enc_loop_cu_final_prms_t *ps_final_prms; |
| nbr_4x4_t *ps_nbr_4x4; |
| |
| WORD32 num_split_flags = 1; |
| UWORD8 u1_tu_size; |
| UWORD8 *pu1_pred; |
| UWORD8 *pu1_ecd_data; |
| WORD16 *pi2_deq_data; |
| UWORD8 *pu1_csbf_buf; |
| UWORD8 *pu1_tu_sz_sft; |
| UWORD8 *pu1_tu_posx; |
| UWORD8 *pu1_tu_posy; |
| LWORD64 total_rdopt_cost; |
| WORD32 ctr; |
| WORD32 chrm_ctr; |
| WORD32 pred_stride; |
| WORD32 recon_stride; |
| WORD32 trans_size = ps_cu_analyse->u1_cu_size; |
| WORD32 csbf_strd; |
| WORD32 ecd_data_bytes_cons; |
| WORD32 num_4x4_in_cu; |
| WORD32 num_4x4_in_tu; |
| WORD32 recon_func_mode; |
| WORD32 cu_bits; |
| UWORD8 u1_compute_spatial_ssd; |
| /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
| UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END]; |
| |
| WORD32 i4_min_trans_size = 256; |
| LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; |
| WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; |
| /* model for no residue syntax qt root cbf flag */ |
| UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX]; |
| UWORD8 u1_skip_tu_sz_sft = 0; |
| UWORD8 u1_skip_tu_posx = 0; |
| UWORD8 u1_skip_tu_posy = 0; |
| UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; |
| |
| ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
| ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
| pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; |
| pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; |
| csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
| pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
| pred_stride = ps_inter_cand->i4_pred_data_stride; |
| recon_stride = cu_size; |
| pu1_pred = ps_inter_cand->pu1_pred_data; |
| chrm_ctr = 0; |
| ecd_data_bytes_cons = 0; |
| total_rdopt_cost = 0; |
| num_4x4_in_cu = cu_size >> 2; |
| recon_func_mode = PRED_MODE_INTER; |
| cu_bits = 0; |
| |
| /* get the 4x4 level postion of current cu */ |
| cu_pos_x = cu_pos_x << 1; |
| cu_pos_y = cu_pos_y << 1; |
| |
| ps_final_prms->u1_is_cu_coded = 0; |
| ps_final_prms->u4_cu_sad = 0; |
| |
| /* populate the coeffs scan idx */ |
| ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| #if ENABLE_INTER_ZCU_COST |
| /* reset cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost = 0; |
| |
| /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
| memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END); |
| #endif |
| |
| if(ps_cu_analyse->u1_cu_size == 64) |
| { |
| num_split_flags = 4; |
| u1_tu_size = 32; |
| } |
| else |
| { |
| num_split_flags = 1; |
| u1_tu_size = ps_cu_analyse->u1_cu_size; |
| } |
| |
| if(1 == ps_final_prms->u1_skip_flag) |
| { |
| if(64 == cu_size) |
| { |
| /* TU = CU/2 is set but no trnaform is evaluated */ |
| pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
| pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
| pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
| } |
| else |
| { |
| /* TU = CU is set but no trnaform is evaluated */ |
| pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
| pu1_tu_posx = &u1_skip_tu_posx; |
| pu1_tu_posy = &u1_skip_tu_posy; |
| } |
| |
| recon_func_mode = PRED_MODE_SKIP; |
| } |
| /* check for PU part mode being AMP or No AMP */ |
| else if(ps_final_prms->u1_part_mode < SIZE_2NxnU) |
| { |
| if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64)) |
| { |
| /* TU= CU is evaluated 2Nx2N inter case */ |
| pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
| pu1_tu_posx = &u1_skip_tu_posx; |
| pu1_tu_posy = &u1_skip_tu_posy; |
| } |
| else |
| { |
| /* currently TU= CU/2 is evaluated for all inter case */ |
| pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
| pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
| pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
| } |
| } |
| else |
| { |
| /* for AMP cases one level of TU recurssion is done */ |
| /* based on oreintation of the partitions */ |
| pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
| pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
| pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
| } |
| |
| i4_min_trans_size = 4; |
| |
| if(ps_ctxt->i1_cu_qp_delta_enable) |
| { |
| ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0); |
| } |
| |
| if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) |
| { |
| ps_ctxt->i8_cl_ssd_lambda_qf = |
| ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / |
| 100.0f); |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
| ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * |
| (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
| } |
| |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
| (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| |
| if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
| { |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| } |
| |
| if(!u1_compute_spatial_ssd) |
| { |
| ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
| ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
| } |
| else |
| { |
| ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1; |
| |
| if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)) |
| { |
| ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1; |
| } |
| } |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| memcpy( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_COEFFX_PREFIX); |
| |
| ihevce_tu_tree_init( |
| as_tu_nodes, |
| cu_size, |
| (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0, |
| ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth, |
| INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), |
| ps_ctxt->u1_chroma_array_type == 2); |
| |
| if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) |
| { |
| ihevce_tuSplitArray_to_tuTree_mapper( |
| as_tu_nodes, |
| ps_inter_cand->ai4_tu_split_flag, |
| cu_size, |
| cu_size, |
| MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)), |
| MIN(MAX_TU_SIZE, cu_size), |
| ps_inter_cand->b1_skip_flag); |
| } |
| |
| ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size); |
| |
| #if ENABLE_INTER_ZCU_COST |
| ps_ctxt->i8_cu_not_coded_cost = 0; |
| #endif |
| |
| s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon = |
| ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride = |
| ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred = |
| ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + |
| curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) * |
| (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon = |
| ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0]; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride = |
| ps_chrm_cu_buf_prms->i4_chrm_src_stride; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride = |
| ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride = |
| ps_final_prms->s_recon_datastore.i4_chromaRecon_stride; |
| s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4; |
| s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data; |
| s_buffer_data_for_tu.pi2_deq_data_chroma = |
| pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx; |
| s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu; |
| s_buffer_data_for_tu.i4_deq_data_stride = cu_size; |
| s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size; |
| s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data; |
| |
| if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)) |
| { |
| UWORD8 i; |
| |
| UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred; |
| |
| for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++) |
| { |
| pu_t *ps_pu; |
| |
| WORD32 inter_pu_wd; |
| WORD32 inter_pu_ht; |
| |
| ps_pu = ps_inter_cand->as_inter_pu + i; |
| |
| inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ |
| inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; |
| inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2); |
| ihevce_chroma_inter_pred_pu( |
| &ps_ctxt->s_mc_ctxt, |
| ps_pu, |
| pu1_pred, |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride); |
| if(!!ps_inter_cand->b3_part_size) |
| { |
| /* 2Nx__ partion case */ |
| if(inter_pu_wd == cu_size) |
| { |
| pu1_pred += |
| (inter_pu_ht * |
| s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride); |
| } |
| |
| /* __x2N partion case */ |
| if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2))) |
| { |
| pu1_pred += inter_pu_wd; |
| } |
| } |
| } |
| } |
| |
| #if !ENABLE_TOP_DOWN_TU_RECURSION |
| total_rdopt_cost = ihevce_tu_tree_selector( |
| ps_ctxt, |
| as_tu_nodes, |
| &s_buffer_data_for_tu, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| recon_func_mode, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| 0, |
| ps_ctxt->u1_max_inter_tr_depth, |
| ps_inter_cand->b3_part_size, |
| u1_compute_spatial_ssd); |
| #else |
| total_rdopt_cost = ihevce_topDown_tu_tree_selector( |
| ps_ctxt, |
| as_tu_nodes, |
| &s_buffer_data_for_tu, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| recon_func_mode, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| 0, |
| ps_ctxt->u1_max_inter_tr_depth, |
| ps_inter_cand->b3_part_size, |
| INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), |
| u1_compute_spatial_ssd); |
| #endif |
| |
| ps_final_prms->u2_num_tus_in_cu = 0; |
| ps_final_prms->u4_cu_luma_res_bits = 0; |
| ps_final_prms->u4_cu_sad = 0; |
| total_rdopt_cost = 0; |
| ecd_data_bytes_cons = 0; |
| cu_bits = 0; |
| #if ENABLE_INTER_ZCU_COST |
| ps_ctxt->i8_cu_not_coded_cost = 0; |
| #endif |
| ps_final_prms->u1_is_cu_coded = 0; |
| ps_final_prms->u1_cu_size = cu_size; |
| |
| ihevce_tu_selector_debriefer( |
| as_tu_nodes, |
| ps_final_prms, |
| &total_rdopt_cost, |
| #if ENABLE_INTER_ZCU_COST |
| &ps_ctxt->i8_cu_not_coded_cost, |
| #endif |
| &ecd_data_bytes_cons, |
| &cu_bits, |
| &ps_final_prms->u2_num_tus_in_cu, |
| ps_ctxt->i4_cu_qp, |
| cu_pos_x * 4, |
| cu_pos_y * 4, |
| INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), |
| (ps_ctxt->u1_chroma_array_type == 2), |
| POS_TL); |
| |
| if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))) |
| { |
| ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons; |
| } |
| |
| /* Modify the cost function for this CU. */ |
| /* loop in for 8x8 blocks */ |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| UWORD8 *pu1_recon_cu; |
| WORD32 recon_stride; |
| WORD32 curr_pos_x; |
| WORD32 curr_pos_y; |
| WORD32 start_index; |
| WORD32 num_horz_cu_in_ctb; |
| WORD32 had_block_size; |
| |
| /* tODO: sreenivasa ctb size has to be used appropriately */ |
| had_block_size = 8; |
| num_horz_cu_in_ctb = 64 / had_block_size; |
| |
| curr_pos_x = cu_pos_x << 2; /* pel units */ |
| curr_pos_y = cu_pos_y << 2; /* pel units */ |
| recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
| pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore |
| .apv_luma_recon_bufs[0]); // already pointing to the current CU recon |
| //+ \curr_pos_x + curr_pos_y * recon_stride; |
| |
| /* start index to index the source satd of curr cu int he current ctb*/ |
| start_index = |
| (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
| |
| { |
| total_rdopt_cost += ihevce_psy_rd_cost( |
| ps_ctxt->ai4_source_satd_8x8, |
| pu1_recon_cu, |
| recon_stride, |
| 1, //howz stride |
| cu_size, |
| 0, // pic type |
| 0, //layer id |
| ps_ctxt->i4_satd_lamda, // lambda |
| start_index, |
| ps_ctxt->u1_is_input_data_hbd, |
| ps_ctxt->u4_psy_strength, |
| &ps_ctxt->s_cmn_opt_func); // 8 bit |
| } |
| } |
| |
| ps_final_prms->u1_chroma_intra_pred_mode = 4; |
| |
| /* update the bytes consumed */ |
| ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; |
| |
| /* store the current cu size to final prms */ |
| ps_final_prms->u1_cu_size = cu_size; |
| /* ------------- Chroma processing -------------- */ |
| /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ |
| if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt && |
| !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))) |
| { |
| LWORD64 chrm_rdopt_cost; |
| WORD32 chrm_rdopt_tu_bits; |
| |
| /* Store the current RDOPT cost to enable early exit in chrom_prcs */ |
| ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; |
| |
| chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( |
| ps_ctxt, |
| curr_buf_idx, |
| 0, /* TU mode : Don't care in Inter patrh */ |
| ps_chrm_cu_buf_prms->pu1_curr_src, |
| ps_chrm_cu_buf_prms->i4_chrm_src_stride, |
| ps_chrm_cu_buf_prms->pu1_cu_left, |
| ps_chrm_cu_buf_prms->pu1_cu_top, |
| ps_chrm_cu_buf_prms->pu1_cu_top_left, |
| ps_chrm_cu_buf_prms->i4_cu_left_stride, |
| (cu_pos_x >> 1), |
| (cu_pos_y >> 1), |
| &chrm_rdopt_tu_bits, |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy); |
| |
| #if WEIGH_CHROMA_COST |
| chrm_rdopt_cost = (LWORD64)( |
| (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| |
| #if CHROMA_RDOPT_ENABLE |
| total_rdopt_cost += chrm_rdopt_cost; |
| #endif |
| cu_bits += chrm_rdopt_tu_bits; |
| |
| /* during chroma evaluation if skip decision was over written */ |
| /* then the current skip candidate is set to a non skip candidate */ |
| ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag; |
| |
| /* cu bits for chroma residual if chroma rdopt is on */ |
| /* if zero_cbf eval is disabled then cu bits will be zero */ |
| ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(total_rdopt_cost > prev_best_rdopt_cost) |
| { |
| return (total_rdopt_cost); |
| } |
| } |
| } |
| else |
| {} |
| |
| #if SHRINK_INTER_TUTREE |
| /* ------------- Quadtree TU split optimization ------------ */ |
| if(ps_final_prms->u1_is_cu_coded) |
| { |
| ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree( |
| &ps_final_prms->as_tu_enc_loop[0], |
| &ps_final_prms->as_tu_enc_loop_temp_prms[0], |
| &ps_final_prms->s_recon_datastore, |
| ps_final_prms->u2_num_tus_in_cu, |
| (ps_ctxt->u1_chroma_array_type == 2)); |
| } |
| #endif |
| |
| /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| /* -------- Bit estimate for RD opt -------------- */ |
| { |
| nbr_avail_flags_t s_nbr; |
| /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ |
| WORD32 cbf_bits, header_bits; |
| |
| /* get the neighbour availability flags for current cu */ |
| ihevce_get_only_nbr_flag( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| (cu_size >> 2), |
| (cu_size >> 2)); |
| |
| /* call the entropy rdo encode to get the bit estimate for current cu */ |
| header_bits = ihevce_entropy_rdo_encode_cu( |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| ps_final_prms, |
| (cu_pos_x >> 1), /* back to 8x8 pel units */ |
| (cu_pos_y >> 1), /* back to 8x8 pel units */ |
| cu_size, |
| ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail |
| : s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| &ps_final_prms->pu1_cu_coeffs[0], |
| &cbf_bits); |
| |
| cu_bits += header_bits; |
| |
| /* cbf bits are excluded from header bits, instead considered as texture bits */ |
| /* incase if zero cbf eval is disabled then texture bits gets added here */ |
| ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); |
| ps_final_prms->u4_cu_cbf_bits = cbf_bits; |
| |
| #if RDOPT_ENABLE |
| /* add the cost of coding the header bits */ |
| total_rdopt_cost += |
| COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| |
| #if ENABLE_INTER_ZCU_COST |
| /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */ |
| if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level)) |
| { |
| LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; |
| |
| WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) && |
| (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag); |
| |
| cab_ctxt_t *ps_cab_ctxt = |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt; |
| |
| /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */ |
| UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12; |
| |
| /* account for coding qt_root_cbf = 0 */ |
| /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */ |
| u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0]; |
| if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]) |
| u4_cu_hdr_bits_q12 = 0; |
| else |
| u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]; |
| |
| /* add the cost of coding the header bits */ |
| i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30( |
| u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */, |
| ps_ctxt->i8_cl_ssd_lambda_qf, |
| (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); |
| |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| i8_cu_not_coded_cost = total_rdopt_cost + 1; |
| } |
| |
| /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */ |
| if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu)) |
| { |
| WORD32 tx_size; |
| |
| /* force cu as not coded and update the cost */ |
| ps_final_prms->u1_is_cu_coded = 0; |
| ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
| ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
| |
| total_rdopt_cost = i8_cu_not_coded_cost; |
| |
| /* reset num TUs to 1 unless cu size id 64 */ |
| ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1; |
| trans_size = (64 == cu_size) ? 32 : cu_size; |
| GETRANGE(tx_size, trans_size); |
| |
| /* reset the bytes consumed */ |
| ps_final_prms->i4_num_bytes_ecd_data = 0; |
| |
| /* reset texture related bits and roll back header bits*/ |
| ps_final_prms->u4_cu_cbf_bits = 0; |
| ps_final_prms->u4_cu_luma_res_bits = 0; |
| ps_final_prms->u4_cu_chroma_res_bits = 0; |
| ps_final_prms->u4_cu_hdr_bits = |
| (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q; |
| |
| /* update cabac model with qtroot cbf = 0 decision */ |
| ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] = |
| gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1]; |
| |
| /* restore untouched cabac models for, tusplit, cbfs, texture etc */ |
| memcpy( |
| &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
| &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
| (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM)); |
| |
| /* mark all tus as not coded for final eval */ |
| for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++) |
| { |
| WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0; |
| WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0; |
| |
| nbr_4x4_t *ps_cur_nbr_4x4 = |
| ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu); |
| |
| num_4x4_in_tu = trans_size >> 2; |
| |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0; |
| ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
| |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x; |
| ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y; |
| |
| /* reset cbf for the all 4x4 in TU */ |
| { |
| WORD32 i, j; |
| nbr_4x4_t *ps_tmp_4x4; |
| ps_tmp_4x4 = ps_cur_nbr_4x4; |
| |
| for(i = 0; i < num_4x4_in_tu; i++) |
| { |
| for(j = 0; j < num_4x4_in_tu; j++) |
| { |
| ps_tmp_4x4[j].b1_y_cbf = 0; |
| } |
| /* row level update*/ |
| ps_tmp_4x4 += num_4x4_in_cu; |
| } |
| } |
| } |
| } |
| } |
| #endif /* ENABLE_INTER_ZCU_COST */ |
| |
| #endif /* RDOPT_ENABLE */ |
| } |
| |
| return (total_rdopt_cost); |
| } |
| #endif |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif |
| * |
| * \brief |
| * Inter Coding unit funtion which performs MC and MVP calc for RD opt mode |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] ps_inter_cand pointer to inter candidate structure |
| * \param[in] cu_size Current CU size |
| * \param[in] cu_pos_x cu position x w.r.t to ctb |
| * \param[in] cu_pos_y cu position y w.r.t to ctb |
| * \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer |
| * \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer |
| * \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer |
| * \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride |
| * \param[in] curr_buf_idx Current Buffer index |
| * |
| * \return |
| * Rdopt cost |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| LWORD64 ihevce_inter_rdopt_cu_mc_mvp( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| cu_inter_cand_t *ps_inter_cand, |
| WORD32 cu_size, |
| WORD32 cu_pos_x, |
| WORD32 cu_pos_y, |
| nbr_4x4_t *ps_left_nbr_4x4, |
| nbr_4x4_t *ps_top_nbr_4x4, |
| nbr_4x4_t *ps_topleft_nbr_4x4, |
| WORD32 nbr_4x4_left_strd, |
| WORD32 curr_buf_idx) |
| { |
| /* local variables */ |
| enc_loop_cu_final_prms_t *ps_final_prms; |
| nbr_avail_flags_t s_nbr; |
| nbr_4x4_t *ps_nbr_4x4; |
| |
| UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND]; |
| UWORD8 *pu1_pred; |
| WORD32 rdopt_cost; |
| WORD32 ctr; |
| WORD32 num_cu_part; |
| WORD32 inter_pu_wd; |
| WORD32 inter_pu_ht; |
| WORD32 pred_stride; |
| |
| /* get the pointers based on curbuf idx */ |
| ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
| ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
| pu1_pred = ps_inter_cand->pu1_pred_data; |
| |
| pred_stride = ps_inter_cand->i4_pred_data_stride; |
| |
| /* store the partition mode in final prms */ |
| ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size; |
| |
| /* since encoder does not support NXN part type */ |
| /* num parts can be either 1 or 2 only */ |
| ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size); |
| |
| num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1; |
| |
| /* get the 4x4 level position of current cu */ |
| cu_pos_x = cu_pos_x << 1; |
| cu_pos_y = cu_pos_y << 1; |
| |
| /* populate cu level params */ |
| ps_final_prms->u1_intra_flag = PRED_MODE_INTER; |
| ps_final_prms->u2_num_pus_in_cu = num_cu_part; |
| |
| /* run a loop over all the partitons in cu */ |
| for(ctr = 0; ctr < num_cu_part; ctr++) |
| { |
| pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND]; |
| pu_t *ps_pu; |
| WORD32 skip_or_merge_flag; |
| UWORD8 u1_use_mvp_from_top_row; |
| |
| ps_pu = &ps_inter_cand->as_inter_pu[ctr]; |
| |
| /* IF AMP then each partitions can have diff wd ht */ |
| inter_pu_wd = (ps_pu->b4_wd + 1) << 2; |
| inter_pu_ht = (ps_pu->b4_ht + 1) << 2; |
| |
| /* populate reference pic buf id for bs compute */ |
| |
| /* L0 */ |
| if(-1 != ps_pu->mv.i1_l0_ref_idx) |
| { |
| ps_pu->mv.i1_l0_ref_pic_buf_id = |
| ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id; |
| } |
| |
| /* L1 */ |
| if(-1 != ps_pu->mv.i1_l1_ref_idx) |
| { |
| ps_pu->mv.i1_l1_ref_pic_buf_id = |
| ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id; |
| } |
| |
| /* SKIP or merge check for every part */ |
| skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag; |
| |
| /* ----------- MV Prediction ----------------- */ |
| if(0 == skip_or_merge_flag) |
| { |
| /* get the neighbour availability flags */ |
| ihevce_get_only_nbr_flag( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| inter_pu_wd >> 2, |
| inter_pu_ht >> 2); |
| |
| if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0)) |
| { |
| u1_use_mvp_from_top_row = 0; |
| } |
| else |
| { |
| u1_use_mvp_from_top_row = 1; |
| } |
| |
| if(!u1_use_mvp_from_top_row) |
| { |
| if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail) |
| { |
| if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail) |
| { |
| WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos; |
| |
| /* Ensure Top Right Sync */ |
| if(!ps_ctxt->u1_use_top_at_ctb_boundary) |
| { |
| curr_cu_pos_in_row = |
| ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2); |
| |
| if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0) |
| { |
| /* No wait for 1st row */ |
| cu_top_right_offset = -(MAX_CTB_SIZE); |
| { |
| ihevce_tile_params_t *ps_col_tile_params = |
| ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + |
| ps_ctxt->i4_tile_col_idx); |
| |
| /* No wait for 1st row */ |
| cu_top_right_offset = |
| -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); |
| } |
| cu_top_right_dep_pos = 0; |
| } |
| else |
| { |
| cu_top_right_offset = (cu_size) + 4; |
| cu_top_right_dep_pos = |
| (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1; |
| } |
| |
| ihevce_dmgr_chk_row_row_sync( |
| ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, |
| curr_cu_pos_in_row, |
| cu_top_right_offset, |
| cu_top_right_dep_pos, |
| ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
| ps_ctxt->thrd_id); |
| } |
| |
| u1_use_mvp_from_top_row = 1; |
| } |
| else |
| { |
| s_nbr.u1_top_avail = 0; |
| s_nbr.u1_top_lt_avail = 0; |
| s_nbr.u1_top_rt_avail = 0; |
| } |
| } |
| else |
| { |
| u1_use_mvp_from_top_row = 1; |
| } |
| } |
| /* Call the MV prediction module to get MVP */ |
| ihevce_mv_pred( |
| &ps_ctxt->s_mv_pred_ctxt, |
| ps_top_nbr_4x4, |
| ps_left_nbr_4x4, |
| ps_topleft_nbr_4x4, |
| nbr_4x4_left_strd, |
| &s_nbr, |
| NULL, /* colocated MV */ |
| ps_pu, |
| &as_pred_mv[0], |
| au1_is_top_used); |
| } |
| |
| /* store the nbr 4x4 structure */ |
| ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag; |
| ps_nbr_4x4->b1_intra_flag = 0; |
| ps_nbr_4x4->b1_pred_l0_flag = 0; |
| ps_nbr_4x4->b1_pred_l1_flag = 0; |
| |
| /* DC is default mode for inter cu, required for intra mode signalling */ |
| ps_nbr_4x4->b6_luma_intra_mode = 1; |
| |
| /* copy the motion vectors to neighbour structure */ |
| ps_nbr_4x4->mv = ps_pu->mv; |
| |
| /* copy the PU to final out pu */ |
| ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu; |
| |
| /* copy the PU to chroma */ |
| ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu; |
| |
| /* store the skip flag to final prms */ |
| ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag; |
| |
| /* MVP index & MVD calc is gated on skip/merge flag */ |
| if(0 == skip_or_merge_flag) |
| { |
| /* calculate the MVDs and popluate the MVP idx for L0 */ |
| if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode)) |
| { |
| WORD32 idx0_cost, idx1_cost; |
| |
| /* calculate the ABS mvd for cand 0 */ |
| idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx); |
| idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy); |
| |
| /* calculate the ABS mvd for cand 1 */ |
| if(u1_use_mvp_from_top_row) |
| { |
| idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx); |
| idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy); |
| } |
| else |
| { |
| idx1_cost = INT_MAX; |
| } |
| |
| /* based on the least cost choose the mvp idx */ |
| if(idx0_cost <= idx1_cost) |
| { |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -= |
| as_pred_mv[0].s_l0_mv.i2_mvx; |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -= |
| as_pred_mv[0].s_l0_mv.i2_mvy; |
| |
| ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0; |
| } |
| else |
| { |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -= |
| as_pred_mv[1].s_l0_mv.i2_mvx; |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -= |
| as_pred_mv[1].s_l0_mv.i2_mvy; |
| |
| ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1; |
| } |
| |
| /* set the pred l0 flag for neighbour storage */ |
| ps_nbr_4x4->b1_pred_l0_flag = 1; |
| } |
| /* calculate the MVDs and popluate the MVP idx for L1 */ |
| if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode)) |
| { |
| WORD32 idx0_cost, idx1_cost; |
| |
| /* calculate the ABS mvd for cand 0 */ |
| idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx); |
| idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy); |
| |
| /* calculate the ABS mvd for cand 1 */ |
| if(u1_use_mvp_from_top_row) |
| { |
| idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx); |
| idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy); |
| } |
| else |
| { |
| idx1_cost = INT_MAX; |
| } |
| |
| /* based on the least cost choose the mvp idx */ |
| if(idx0_cost <= idx1_cost) |
| { |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -= |
| as_pred_mv[0].s_l1_mv.i2_mvx; |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -= |
| as_pred_mv[0].s_l1_mv.i2_mvy; |
| |
| ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0; |
| } |
| else |
| { |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -= |
| as_pred_mv[1].s_l1_mv.i2_mvx; |
| ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -= |
| as_pred_mv[1].s_l1_mv.i2_mvy; |
| |
| ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1; |
| } |
| |
| /* set the pred l1 flag for neighbour storage */ |
| ps_nbr_4x4->b1_pred_l1_flag = 1; |
| } |
| |
| /* set the merge flag to 0 */ |
| ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0; |
| ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0; |
| } |
| else |
| { |
| /* copy the merge index from candidate */ |
| ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag; |
| |
| ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx; |
| |
| if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode)) |
| { |
| /* set the pred l0 flag for neighbour storage */ |
| ps_nbr_4x4->b1_pred_l0_flag = 1; |
| } |
| |
| /* calculate the MVDs and popluate the MVP idx for L1 */ |
| if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode)) |
| { |
| /* set the pred l1 flag for neighbour storage */ |
| ps_nbr_4x4->b1_pred_l1_flag = 1; |
| } |
| } |
| |
| /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */ |
| rdopt_cost = 0; |
| |
| /* copy the MV to colocated Mv structure */ |
| ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv; |
| ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv; |
| ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx; |
| ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx; |
| ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode; |
| ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0; |
| |
| /* replicate neighbour 4x4 strcuture for entire partition */ |
| { |
| WORD32 i, j; |
| nbr_4x4_t *ps_tmp_4x4; |
| |
| ps_tmp_4x4 = ps_nbr_4x4; |
| |
| for(i = 0; i < (inter_pu_ht >> 2); i++) |
| { |
| for(j = 0; j < (inter_pu_wd >> 2); j++) |
| { |
| ps_tmp_4x4[j] = *ps_nbr_4x4; |
| } |
| /* row level update*/ |
| ps_tmp_4x4 += (cu_size >> 2); |
| } |
| } |
| /* set the neighbour map to 1 */ |
| ihevce_set_inter_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| (inter_pu_wd >> 2), |
| (inter_pu_ht >> 2), |
| 1); |
| /* ----------- Motion Compensation for Luma ----------- */ |
| #if !ENABLE_MIXED_INTER_MODE_EVAL |
| { |
| IV_API_CALL_STATUS_T valid_mv_cand; |
| |
| /*If the inter candidate is neither merge cand nor skip cand |
| then calculate the mc.*/ |
| if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on)) |
| { |
| valid_mv_cand = |
| ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0); |
| |
| /* assert if the MC is given a valid mv candidate */ |
| ASSERT(valid_mv_cand == IV_SUCCESS); |
| } |
| } |
| #endif |
| if((2 == num_cu_part) && (0 == ctr)) |
| { |
| /* 2Nx__ partion case */ |
| if(inter_pu_wd == cu_size) |
| { |
| cu_pos_y += (inter_pu_ht >> 2); |
| pu1_pred += (inter_pu_ht * pred_stride); |
| ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2); |
| ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd; |
| ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2); |
| ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd; |
| } |
| |
| /* __x2N partion case */ |
| if(inter_pu_ht == cu_size) |
| { |
| cu_pos_x += (inter_pu_wd >> 2); |
| pu1_pred += inter_pu_wd; |
| ps_nbr_4x4 += (inter_pu_wd >> 2); |
| ps_left_nbr_4x4 = ps_nbr_4x4 - 1; |
| ps_top_nbr_4x4 += (inter_pu_wd >> 2); |
| ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1; |
| nbr_4x4_left_strd = (cu_size >> 2); |
| } |
| } |
| } |
| |
| return (rdopt_cost); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_intra_chroma_pred_mode_selector \endif |
| * |
| * \brief |
| * Coding unit processing function for chroma special modes (Non-Luma modes) |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms |
| * \param[in] ps_cu_analyse pointer to cu analyse |
| * \param[in] rd_opt_curr_idx index in the array of RDopt params |
| * \param[in] tu_mode TU_EQ_CU or other case |
| * |
| * \return |
| * Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| UWORD8 ihevce_distortion_based_intra_chroma_mode_selector( |
| cu_analyse_t *ps_cu_analyse, |
| ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution, |
| pf_intra_pred *ppf_chroma_ip, |
| pf_res_trans_luma_had_chroma *ppf_resd_trns_had, |
| UWORD8 *pu1_src, |
| WORD32 i4_src_stride, |
| UWORD8 *pu1_pred, |
| WORD32 i4_pred_stride, |
| UWORD8 *pu1_ctb_nbr_map, |
| WORD32 i4_nbr_map_strd, |
| UWORD8 *pu1_ref_sub_out, |
| WORD32 i4_alpha_stim_multiplier, |
| UWORD8 u1_is_cu_noisy, |
| UWORD8 u1_trans_size, |
| UWORD8 u1_trans_idx, |
| UWORD8 u1_num_tus_in_cu, |
| UWORD8 u1_num_4x4_luma_blks_in_tu, |
| UWORD8 u1_enable_psyRDOPT, |
| UWORD8 u1_is_422) |
| { |
| UWORD8 u1_chrm_mode; |
| UWORD8 ctr; |
| WORD32 i4_subtu_idx; |
| |
| WORD32 i = 0; |
| UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 }; |
| WORD32 i4_satd_had[4] = { 0 }; |
| WORD32 i4_best_satd_had = INT_MAX; |
| UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1); |
| UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1); |
| WORD32 i4_num_sub_tus = u1_is_422 + 1; |
| UWORD8 u1_best_chrm_mode = 0; |
| |
| /* Get the best satd among all possible modes */ |
| for(i = 0; i < 4; i++) |
| { |
| WORD32 left_strd = i4_src_stride; |
| |
| u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]] |
| : u1_chrm_modes[i]; |
| |
| /* loop based on num tus in a cu */ |
| for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++) |
| { |
| WORD32 luma_nbr_flags; |
| WORD32 chrm_pred_func_idx; |
| |
| WORD32 i4_trans_size_m2 = u1_trans_size << 1; |
| UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) + |
| (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422); |
| UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) + |
| (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422); |
| WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu); |
| WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu); |
| |
| luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
| pu1_ctb_nbr_map, |
| i4_nbr_map_strd, |
| i4_curr_tu_pos_x, |
| i4_curr_tu_pos_y, |
| u1_num_4x4_luma_blks_in_tu, |
| u1_num_4x4_luma_blks_in_tu); |
| |
| for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++) |
| { |
| WORD32 nbr_flags; |
| |
| UWORD8 *pu1_cur_src = |
| pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride); |
| UWORD8 *pu1_cur_pred = |
| pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride); |
| UWORD8 *pu1_left = pu1_cur_src - 2; |
| UWORD8 *pu1_top = pu1_cur_src - i4_src_stride; |
| UWORD8 *pu1_top_left = pu1_top - 2; |
| |
| nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
| luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422); |
| |
| /* call the chroma reference array substitution */ |
| pf_ref_substitution( |
| pu1_top_left, |
| pu1_top, |
| pu1_left, |
| left_strd, |
| u1_trans_size, |
| nbr_flags, |
| pu1_ref_sub_out, |
| 1); |
| |
| /* use the look up to get the function idx */ |
| chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode]; |
| |
| /* call the intra prediction function */ |
| ppf_chroma_ip[chrm_pred_func_idx]( |
| pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode); |
| |
| if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier) |
| { |
| /* compute Hadamard-transform satd : Cb */ |
| i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1]( |
| pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0); |
| |
| /* compute Hadamard-transform satd : Cr */ |
| i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1]( |
| pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0); |
| } |
| else |
| { |
| WORD32 i4_satd; |
| |
| /* compute Hadamard-transform satd : Cb */ |
| i4_satd = ppf_resd_trns_had[u1_trans_idx - 1]( |
| pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0); |
| |
| i4_satd = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| i4_src_stride, |
| pu1_cur_pred, |
| i4_pred_stride, |
| i4_satd, |
| i4_alpha_stim_multiplier, |
| u1_trans_size, |
| 0, |
| u1_enable_psyRDOPT, |
| U_PLANE); |
| |
| i4_satd_had[i] += i4_satd; |
| |
| /* compute Hadamard-transform satd : Cr */ |
| i4_satd = ppf_resd_trns_had[u1_trans_idx - 1]( |
| pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0); |
| |
| i4_satd = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| i4_src_stride, |
| pu1_cur_pred, |
| i4_pred_stride, |
| i4_satd, |
| i4_alpha_stim_multiplier, |
| u1_trans_size, |
| 0, |
| u1_enable_psyRDOPT, |
| V_PLANE); |
| |
| i4_satd_had[i] += i4_satd; |
| } |
| } |
| |
| /* set the neighbour map to 1 */ |
| ihevce_set_nbr_map( |
| pu1_ctb_nbr_map, |
| i4_nbr_map_strd, |
| i4_curr_tu_pos_x, |
| i4_curr_tu_pos_y, |
| u1_num_4x4_luma_blks_in_tu, |
| 1); |
| } |
| |
| /* set the neighbour map to 0 */ |
| ihevce_set_nbr_map( |
| pu1_ctb_nbr_map, |
| i4_nbr_map_strd, |
| (ps_cu_analyse->b3_cu_pos_x << 1), |
| (ps_cu_analyse->b3_cu_pos_y << 1), |
| (ps_cu_analyse->u1_cu_size >> 2), |
| 0); |
| |
| /* Get the least SATD and corresponding mode */ |
| if(i4_best_satd_had > i4_satd_had[i]) |
| { |
| i4_best_satd_had = i4_satd_had[i]; |
| u1_best_chrm_mode = u1_chrm_mode; |
| } |
| } |
| |
| return u1_best_chrm_mode; |
| } |
| |
| void ihevce_intra_chroma_pred_mode_selector( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
| cu_analyse_t *ps_cu_analyse, |
| WORD32 rd_opt_curr_idx, |
| WORD32 tu_mode, |
| WORD32 i4_alpha_stim_multiplier, |
| UWORD8 u1_is_cu_noisy) |
| { |
| chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt; |
| |
| ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; |
| |
| UWORD8 *pu1_pred; |
| WORD32 trans_size; |
| WORD32 num_tus_in_cu; |
| WORD32 pred_strd; |
| WORD32 ctr; |
| WORD32 i4_subtu_idx; |
| WORD32 i4_num_sub_tus; |
| WORD32 trans_idx; |
| WORD32 scan_idx; |
| WORD32 num_4x4_luma_in_tu; |
| WORD32 cu_pos_x; |
| WORD32 cu_pos_y; |
| |
| recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore, |
| &ps_ctxt->as_cu_prms[1].s_recon_datastore }; |
| |
| LWORD64 chrm_cod_cost = 0; |
| WORD32 chrm_tu_bits = 0; |
| WORD32 best_chrm_mode = DM_CHROMA_IDX; |
| UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src; |
| WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride; |
| UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left; |
| UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top; |
| UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left; |
| WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride; |
| WORD32 cu_size = ps_cu_analyse->u1_cu_size; |
| WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
| WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
| UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
| |
| ihevc_intra_pred_chroma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; |
| i4_num_sub_tus = (u1_is_422 == 1) + 1; |
| |
| #if DISABLE_RDOQ_INTRA |
| i4_perform_rdoq = 0; |
| #endif |
| |
| if(TU_EQ_CU == tu_mode) |
| { |
| num_tus_in_cu = 1; |
| trans_size = cu_size >> 1; |
| num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/ |
| ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode]; |
| } |
| else |
| { |
| num_tus_in_cu = 4; |
| trans_size = cu_size >> 2; |
| num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/ |
| |
| /* For 8x8 CU only one TU */ |
| if(MIN_TU_SIZE > trans_size) |
| { |
| trans_size = MIN_TU_SIZE; |
| num_tus_in_cu = 1; |
| /* chroma nbr avail. is derived based on luma. |
| for 4x4 chrm use 8x8 luma's size */ |
| num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1; |
| } |
| |
| ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode]; |
| } |
| |
| /* Can't be TU_EQ_SUBCU case */ |
| ASSERT(TU_EQ_SUBCU != tu_mode); |
| |
| /* translate the transform size to index */ |
| trans_idx = trans_size >> 2; |
| |
| pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data; |
| |
| pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride; |
| |
| /* for 16x16 cases */ |
| if(16 == trans_size) |
| { |
| trans_idx = 3; |
| } |
| |
| best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector( |
| ps_cu_analyse, |
| ihevc_intra_pred_chroma_ref_substitution_fptr, |
| ps_ctxt->apf_chrm_ip, |
| ps_ctxt->apf_chrm_resd_trns_had, |
| pu1_chrm_src, |
| chrm_src_stride, |
| pu1_pred, |
| pred_strd, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| trans_size, |
| trans_idx, |
| num_tus_in_cu, |
| num_4x4_luma_in_tu, |
| ps_ctxt->u1_enable_psyRDOPT, |
| u1_is_422); |
| |
| /* Store the best chroma mode */ |
| ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode; |
| |
| /* evaluate RDOPT cost for the Best mode */ |
| { |
| WORD32 i4_subtu_pos_x; |
| WORD32 i4_subtu_pos_y; |
| UWORD8 u1_compute_spatial_ssd; |
| |
| WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 }; |
| WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 }; |
| /* State for prefix bin of chroma intra pred mode before CU encode */ |
| UWORD8 u1_chroma_intra_mode_prefix_state = |
| ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE]; |
| WORD32 luma_trans_size = trans_size << 1; |
| WORD32 calc_recon = 0; |
| UWORD8 *pu1_left = pu1_cu_left; |
| UWORD8 *pu1_top = pu1_cu_top; |
| UWORD8 *pu1_top_left = pu1_cu_top_left; |
| WORD32 left_strd = cu_left_stride; |
| |
| if(ps_ctxt->i1_cu_qp_delta_enable) |
| { |
| ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1); |
| } |
| |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
| (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| |
| if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
| { |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| } |
| |
| /* get the 4x4 level postion of current cu */ |
| cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1); |
| cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1); |
| |
| calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1)); |
| |
| if(calc_recon || u1_compute_spatial_ssd) |
| { |
| aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1; |
| aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1; |
| } |
| else |
| { |
| aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0; |
| aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0; |
| } |
| |
| /* loop based on num tus in a cu */ |
| for(ctr = 0; ctr < num_tus_in_cu; ctr++) |
| { |
| WORD16 *pi2_cur_deq_data_cb; |
| WORD16 *pi2_cur_deq_data_cr; |
| |
| WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride; |
| WORD32 luma_nbr_flags = 0; |
| |
| luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x, |
| (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y, |
| (luma_trans_size >> 2), |
| (luma_trans_size >> 2)); |
| |
| for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++) |
| { |
| WORD32 cbf, num_bytes; |
| LWORD64 trans_ssd_u, trans_ssd_v; |
| UWORD8 u1_is_recon_available; |
| |
| WORD32 trans_size_m2 = trans_size << 1; |
| UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) + |
| (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) + |
| (i4_subtu_idx * trans_size * chrm_src_stride); |
| UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) + |
| (((ctr > 1) * trans_size * pred_strd) << u1_is_422) + |
| (i4_subtu_idx * trans_size * pred_strd); |
| WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride; |
| UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0] |
| ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) + |
| ((ctr & 1) * trans_size_m2) + |
| (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) + |
| (i4_subtu_idx * trans_size * i4_recon_stride); |
| |
| /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb. |
| chroma coeff/iq for high quality intra SATD special modes. Will |
| be over written by coeff of luma mode in chroma_rdopt call */ |
| UWORD8 *pu1_ecd_data_cb = |
| &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0]; |
| UWORD8 *pu1_ecd_data_cr = |
| &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0]; |
| |
| WORD32 chrm_pred_func_idx = 0; |
| LWORD64 curr_cb_cod_cost = 0; |
| LWORD64 curr_cr_cod_cost = 0; |
| WORD32 nbr_flags = 0; |
| |
| i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2); |
| i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) + |
| ((i4_subtu_idx * trans_size) >> 2); |
| pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + |
| ((ctr & 1) * trans_size) + |
| (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) + |
| (i4_subtu_idx * trans_size * deq_data_strd); |
| pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + |
| ((ctr & 1) * trans_size) + |
| (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) + |
| (i4_subtu_idx * trans_size * deq_data_strd); |
| |
| /* left cu boundary */ |
| if(0 == i4_subtu_pos_x) |
| { |
| left_strd = cu_left_stride; |
| pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd; |
| } |
| else |
| { |
| pu1_left = pu1_cur_recon - 2; |
| left_strd = i4_recon_stride; |
| } |
| |
| /* top cu boundary */ |
| if(0 == i4_subtu_pos_y) |
| { |
| pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2); |
| } |
| else |
| { |
| pu1_top = pu1_cur_recon - i4_recon_stride; |
| } |
| |
| /* by default top left is set to cu top left */ |
| pu1_top_left = pu1_cu_top_left; |
| |
| /* top left based on position */ |
| if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x)) |
| { |
| pu1_top_left = pu1_left - left_strd; |
| } |
| else if(0 != i4_subtu_pos_x) |
| { |
| pu1_top_left = pu1_top - 2; |
| } |
| |
| /* populate the coeffs scan idx */ |
| scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| COPY_CABAC_STATES( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| |
| /* for 4x4 transforms based on intra pred mode scan is choosen*/ |
| if(4 == trans_size) |
| { |
| /* for modes from 22 upto 30 horizontal scan is used */ |
| if((best_chrm_mode > 21) && (best_chrm_mode < 31)) |
| { |
| scan_idx = SCAN_HORZ; |
| } |
| /* for modes from 6 upto 14 horizontal scan is used */ |
| else if((best_chrm_mode > 5) && (best_chrm_mode < 15)) |
| { |
| scan_idx = SCAN_VERT; |
| } |
| } |
| |
| nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
| luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422); |
| |
| /* call the chroma reference array substitution */ |
| ihevc_intra_pred_chroma_ref_substitution_fptr( |
| pu1_top_left, |
| pu1_top, |
| pu1_left, |
| left_strd, |
| trans_size, |
| nbr_flags, |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1); |
| |
| /* use the look up to get the function idx */ |
| chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode]; |
| |
| /* call the intra prediction function */ |
| ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1, |
| pu1_cur_pred, |
| pred_strd, |
| trans_size, |
| best_chrm_mode); |
| |
| /* UPLANE RDOPT Loop */ |
| { |
| WORD32 tu_bits; |
| |
| cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_src, |
| chrm_src_stride, |
| pi2_cur_deq_data_cb, |
| deq_data_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx], |
| ps_ctxt->au1_cu_csbf, |
| ps_ctxt->i4_cu_csbf_strd, |
| trans_size, |
| scan_idx, |
| 1, |
| &num_bytes, |
| &tu_bits, |
| &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr], |
| &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr], |
| &u1_is_recon_available, |
| i4_perform_sbh, |
| i4_perform_rdoq, |
| &trans_ssd_u, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| 0, |
| u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| U_PLANE); |
| |
| #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| #else |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_recon, |
| i4_recon_stride, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| } |
| else |
| { |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| } |
| #endif |
| } |
| #endif |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cbf) |
| { |
| memcpy( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
| else |
| { |
| memcpy( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| |
| if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd)) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_cur_deq_data_cb, |
| deq_data_strd, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]), |
| trans_size, |
| cbf, |
| ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr], |
| ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr], |
| U_PLANE); |
| } |
| |
| ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf; |
| curr_cb_cod_cost = |
| trans_ssd_u + |
| COMPUTE_RATE_COST_CLIP30( |
| tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
| chrm_tu_bits += tu_bits; |
| ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes; |
| ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] = |
| num_bytes; |
| } |
| |
| /* VPLANE RDOPT Loop */ |
| { |
| WORD32 tu_bits; |
| |
| cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_src, |
| chrm_src_stride, |
| pi2_cur_deq_data_cr, |
| deq_data_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx], |
| ps_ctxt->au1_cu_csbf, |
| ps_ctxt->i4_cu_csbf_strd, |
| trans_size, |
| scan_idx, |
| 1, |
| &num_bytes, |
| &tu_bits, |
| &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr], |
| &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr], |
| &u1_is_recon_available, |
| i4_perform_sbh, |
| i4_perform_rdoq, |
| &trans_ssd_v, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| 0, |
| u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| V_PLANE); |
| |
| #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| #else |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_recon, |
| i4_recon_stride, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| } |
| else |
| { |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| } |
| #endif |
| } |
| #endif |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cbf) |
| { |
| COPY_CABAC_STATES( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
| else |
| { |
| COPY_CABAC_STATES( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| |
| if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd)) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_cur_deq_data_cr, |
| deq_data_strd, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]), |
| trans_size, |
| cbf, |
| ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr], |
| ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr], |
| V_PLANE); |
| } |
| |
| ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf; |
| curr_cr_cod_cost = |
| trans_ssd_v + |
| COMPUTE_RATE_COST_CLIP30( |
| tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
| chrm_tu_bits += tu_bits; |
| ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes; |
| ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] = |
| num_bytes; |
| } |
| |
| chrm_cod_cost += curr_cb_cod_cost; |
| chrm_cod_cost += curr_cr_cod_cost; |
| } |
| |
| /* set the neighbour map to 1 */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x, |
| (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y, |
| (luma_trans_size >> 2), |
| 1); |
| } |
| |
| /* set the neighbour map to 0 */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (ps_cu_analyse->b3_cu_pos_x << 1), |
| (ps_cu_analyse->b3_cu_pos_y << 1), |
| (ps_cu_analyse->u1_cu_size >> 2), |
| 0); |
| |
| /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */ |
| /* This is done by adding the bits for signalling chroma mode (0-3) */ |
| /* and subtracting the bits for chroma mode same as luma mode (4) */ |
| #if CHROMA_RDOPT_ENABLE |
| { |
| /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */ |
| WORD32 bits_frac_1 = |
| gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1]; |
| |
| WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1; |
| |
| /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */ |
| WORD32 bits_for_mode4 = |
| gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0]; |
| |
| /* accumulate into final rd cost for chroma */ |
| ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30( |
| (bits_for_mode_0to3 - bits_for_mode4), |
| ps_ctxt->i8_cl_ssd_lambda_chroma_qf, |
| (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); |
| |
| chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode; |
| } |
| #endif |
| |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| UWORD8 *pu1_recon_cu; |
| WORD32 recon_stride; |
| WORD32 curr_pos_x; |
| WORD32 curr_pos_y; |
| WORD32 start_index; |
| WORD32 num_horz_cu_in_ctb; |
| WORD32 had_block_size; |
| |
| /* tODO: sreenivasa ctb size has to be used appropriately */ |
| had_block_size = 8; |
| num_horz_cu_in_ctb = 2 * 64 / had_block_size; |
| curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ |
| curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ |
| recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride; |
| pu1_recon_cu = |
| aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; // |
| |
| /* start index to index the source satd of curr cu int he current ctb*/ |
| start_index = 2 * (curr_pos_x / had_block_size) + |
| (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
| |
| { |
| chrm_cod_cost += ihevce_psy_rd_cost_croma( |
| ps_ctxt->ai4_source_chroma_satd, |
| pu1_recon_cu, |
| recon_stride, |
| 1, // |
| cu_size, |
| 0, // pic type |
| 0, //layer id |
| ps_ctxt->i4_satd_lamda, // lambda |
| start_index, |
| ps_ctxt->u1_is_input_data_hbd, // 8 bit |
| ps_ctxt->u1_chroma_array_type, |
| &ps_ctxt->s_cmn_opt_func |
| |
| ); // chroma subsampling 420 |
| } |
| } |
| |
| ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost; |
| ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits; |
| |
| memcpy( |
| &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_chroma_cu_prcs_rdopt \endif |
| * |
| * \brief |
| * Coding unit processing function for chroma |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] rd_opt_curr_idx index in the array of RDopt params |
| * \param[in] func_proc_mode TU_EQ_CU or other case |
| * \param[in] pu1_chrm_src pointer to source data buffer |
| * \param[in] chrm_src_stride source buffer stride |
| * \param[in] pu1_cu_left pointer to left recon data buffer |
| * \param[in] pu1_cu_top pointer to top recon data buffer |
| * \param[in] pu1_cu_top_left pointer to top left recon data buffer |
| * \param[in] left_stride left recon buffer stride |
| * \param[out] cu_pos_x position x of current CU in CTB |
| * \param[out] cu_pos_y position y of current CU in CTB |
| * \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits |
| * |
| * \return |
| * Chroma coding cost (cb adn Cr included) |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| LWORD64 ihevce_chroma_cu_prcs_rdopt( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| WORD32 rd_opt_curr_idx, |
| WORD32 func_proc_mode, |
| UWORD8 *pu1_chrm_src, |
| WORD32 chrm_src_stride, |
| UWORD8 *pu1_cu_left, |
| UWORD8 *pu1_cu_top, |
| UWORD8 *pu1_cu_top_left, |
| WORD32 cu_left_stride, |
| WORD32 cu_pos_x, |
| WORD32 cu_pos_y, |
| WORD32 *pi4_chrm_tu_bits, |
| WORD32 i4_alpha_stim_multiplier, |
| UWORD8 u1_is_cu_noisy) |
| { |
| tu_enc_loop_out_t *ps_tu; |
| tu_enc_loop_temp_prms_t *ps_tu_temp_prms; |
| |
| ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; |
| |
| UWORD8 *pu1_pred; |
| UWORD8 *pu1_recon; |
| WORD32 i4_recon_stride; |
| WORD32 cu_size, trans_size = 0; |
| WORD32 pred_strd; |
| WORD32 ctr, i4_subtu_idx; |
| WORD32 scan_idx; |
| WORD32 u1_is_cu_coded_old; |
| WORD32 init_bytes_offset; |
| |
| enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx]; |
| recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore; |
| |
| WORD32 total_bytes_offset = 0; |
| LWORD64 chrm_cod_cost = 0; |
| WORD32 chrm_tu_bits = 0; |
| WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35; |
| LWORD64 i8_ssd_cb = 0; |
| WORD32 i4_bits_cb = 0; |
| LWORD64 i8_ssd_cr = 0; |
| WORD32 i4_bits_cr = 0; |
| UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
| UWORD8 u1_num_tus = |
| /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */ |
| (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag) |
| ? 1 |
| : ps_best_cu_prms->u2_num_tus_in_cu; |
| UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1; |
| UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
| (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| /* Get the RDOPT cost of the best CU mode for early_exit */ |
| LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost; |
| /* Get the current running RDOPT (Luma RDOPT) for early_exit */ |
| LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost; |
| WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
| WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
| |
| ihevc_intra_pred_chroma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; |
| |
| if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
| { |
| u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
| CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
| } |
| |
| /* Store the init bytes offset from luma */ |
| init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data; |
| |
| /* Unused pred buffer in merge_skip_pred_data_t structure is used as |
| Chroma pred storage buf. for final_recon function. |
| The buffer is split into two and used as a ping-pong buffer */ |
| pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + |
| rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + |
| (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); |
| |
| pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; |
| |
| pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0]; |
| i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride; |
| cu_size = ps_best_cu_prms->u1_cu_size; |
| chrm_tu_bits = 0; |
| |
| /* get the first TU pointer */ |
| ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; |
| /* get the first TU enc_loop temp prms pointer */ |
| ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
| |
| if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) |
| { |
| /* Mode signalled by intra prediction for luma */ |
| luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0]; |
| |
| #if DISABLE_RDOQ_INTRA |
| i4_perform_rdoq = 0; |
| #endif |
| } |
| |
| else |
| { |
| UWORD8 *pu1_pred_org = pu1_pred; |
| |
| /* ------ Motion Compensation for Chroma -------- */ |
| for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++) |
| { |
| pu_t *ps_pu; |
| WORD32 inter_pu_wd; |
| WORD32 inter_pu_ht; |
| |
| ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr]; |
| |
| inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ |
| inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; |
| inter_pu_ht <<= u1_is_422; |
| |
| ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd); |
| |
| if(2 == ps_best_cu_prms->u2_num_pus_in_cu) |
| { |
| /* 2Nx__ partion case */ |
| if(inter_pu_wd == cu_size) |
| { |
| pu1_pred += (inter_pu_ht * pred_strd); |
| } |
| |
| /* __x2N partion case */ |
| if(inter_pu_ht == (cu_size >> (u1_is_422 == 0))) |
| { |
| pu1_pred += inter_pu_wd; |
| } |
| } |
| } |
| |
| /* restore the pred pointer to start for transform loop */ |
| pu1_pred = pu1_pred_org; |
| } |
| |
| /* Used to store back only the luma based info. if SATD based chorma |
| mode also comes */ |
| u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded; |
| |
| /* evaluate chroma candidates (same as luma) and |
| if INTRA & HIGH_QUALITY compare with best SATD mode */ |
| { |
| WORD32 calc_recon = 0, deq_data_strd; |
| WORD16 *pi2_deq_data; |
| UWORD8 *pu1_ecd_data; |
| UWORD8 u1_is_mode_eq_chroma_satd_mode = 0; |
| |
| pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0]; |
| pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx; |
| deq_data_strd = cu_size; |
| /* update ecd buffer for storing coeff. */ |
| pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0]; |
| pu1_ecd_data += init_bytes_offset; |
| /* store chroma starting index */ |
| ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset; |
| |
| /* get the first TU pointer */ |
| ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; |
| ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
| |
| /* Reset total_bytes_offset for each candidate */ |
| chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode] |
| : luma_pred_mode; |
| |
| total_bytes_offset = 0; |
| |
| if(TU_EQ_SUBCU == func_proc_mode) |
| { |
| func_proc_mode = TU_EQ_CU_DIV2; |
| } |
| |
| /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and |
| TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */ |
| if(8 == cu_size) |
| { |
| func_proc_mode = TU_EQ_CU; |
| } |
| |
| /* loop based on num tus in a cu */ |
| if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd || |
| (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd && |
| (chrm_pred_mode != |
| ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode))) |
| { |
| /* loop based on num tus in a cu */ |
| for(ctr = 0; ctr < u1_num_tus; ctr++) |
| { |
| WORD32 num_bytes = 0; |
| LWORD64 curr_cb_cod_cost = 0; |
| LWORD64 curr_cr_cod_cost = 0; |
| WORD32 chrm_pred_func_idx = 0; |
| UWORD8 u1_is_early_exit_condition_satisfied = 0; |
| |
| /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */ |
| /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */ |
| ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0; |
| ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0; |
| ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset; |
| ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset; |
| ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset; |
| ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset; |
| ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0; |
| ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0; |
| ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0; |
| ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0; |
| |
| /* TU level inits */ |
| /* check if chroma present flag is set */ |
| if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) |
| { |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| COPY_CABAC_STATES( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| |
| /* get the current transform size */ |
| trans_size = ps_tu->s_tu.b3_size; |
| trans_size = (1 << (trans_size + 1)); /* in chroma units */ |
| |
| /* since 2x2 transform is not allowed for chroma*/ |
| if(2 == trans_size) |
| { |
| trans_size = 4; |
| } |
| } |
| |
| for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++) |
| { |
| WORD32 cbf; |
| UWORD8 u1_is_recon_available; |
| |
| WORD32 nbr_flags = 0; |
| WORD32 zero_cols = 0; |
| WORD32 zero_rows = 0; |
| |
| /* check if chroma present flag is set */ |
| if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) |
| { |
| UWORD8 *pu1_cur_pred; |
| UWORD8 *pu1_cur_recon; |
| UWORD8 *pu1_cur_src; |
| WORD16 *pi2_cur_deq_data; |
| WORD32 curr_pos_x, curr_pos_y; |
| LWORD64 trans_ssd_u, trans_ssd_v; |
| |
| /* get the current sub-tu posx and posy w.r.t to cu */ |
| curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3); |
| curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) + |
| (i4_subtu_idx * trans_size); |
| |
| /* 420sp case only vertical height will be half */ |
| if(u1_is_422 == 0) |
| { |
| curr_pos_y >>= 1; |
| } |
| |
| /* increment the pointers to start of current Sub-TU */ |
| pu1_cur_recon = (pu1_recon + curr_pos_x); |
| pu1_cur_recon += (curr_pos_y * i4_recon_stride); |
| pu1_cur_src = (pu1_chrm_src + curr_pos_x); |
| pu1_cur_src += (curr_pos_y * chrm_src_stride); |
| pu1_cur_pred = (pu1_pred + curr_pos_x); |
| pu1_cur_pred += (curr_pos_y * pred_strd); |
| pi2_cur_deq_data = pi2_deq_data + curr_pos_x; |
| pi2_cur_deq_data += (curr_pos_y * deq_data_strd); |
| |
| /* populate the coeffs scan idx */ |
| scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| /* perform intra prediction only for Intra case */ |
| if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) |
| { |
| UWORD8 *pu1_top_left; |
| UWORD8 *pu1_top; |
| UWORD8 *pu1_left; |
| WORD32 left_strd; |
| |
| calc_recon = !u1_compute_spatial_ssd && |
| ((4 == u1_num_tus) || (u1_is_422 == 1)) && |
| (((u1_num_tus == 1) && (0 == i4_subtu_idx)) || |
| ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) || |
| ((u1_num_tus == 4) && (ctr < 3))); |
| |
| /* left cu boundary */ |
| if(0 == curr_pos_x) |
| { |
| pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride; |
| left_strd = cu_left_stride; |
| } |
| else |
| { |
| pu1_left = pu1_cur_recon - 2; |
| left_strd = i4_recon_stride; |
| } |
| |
| /* top cu boundary */ |
| if(0 == curr_pos_y) |
| { |
| pu1_top = pu1_cu_top + curr_pos_x; |
| } |
| else |
| { |
| pu1_top = pu1_cur_recon - i4_recon_stride; |
| } |
| |
| /* by default top left is set to cu top left */ |
| pu1_top_left = pu1_cu_top_left; |
| |
| /* top left based on position */ |
| if((0 != curr_pos_y) && (0 == curr_pos_x)) |
| { |
| pu1_top_left = pu1_left - cu_left_stride; |
| } |
| else if(0 != curr_pos_x) |
| { |
| pu1_top_left = pu1_top - 2; |
| } |
| |
| /* for 4x4 transforms based on intra pred mode scan is choosen*/ |
| if(4 == trans_size) |
| { |
| /* for modes from 22 upto 30 horizontal scan is used */ |
| if((chrm_pred_mode > 21) && (chrm_pred_mode < 31)) |
| { |
| scan_idx = SCAN_HORZ; |
| } |
| /* for modes from 6 upto 14 horizontal scan is used */ |
| else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15)) |
| { |
| scan_idx = SCAN_VERT; |
| } |
| } |
| |
| nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
| ps_best_cu_prms->au4_nbr_flags[ctr], |
| i4_subtu_idx, |
| trans_size, |
| u1_is_422); |
| |
| /* call the chroma reference array substitution */ |
| ihevc_intra_pred_chroma_ref_substitution_fptr( |
| pu1_top_left, |
| pu1_top, |
| pu1_left, |
| left_strd, |
| trans_size, |
| nbr_flags, |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1); |
| |
| /* use the look up to get the function idx */ |
| chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode]; |
| |
| /* call the intra prediction function */ |
| ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1, |
| pu1_cur_pred, |
| pred_strd, |
| trans_size, |
| chrm_pred_mode); |
| } |
| |
| if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon)) |
| { |
| ps_recon_datastore->au1_is_chromaRecon_available[0] = |
| !ps_best_cu_prms->u1_skip_flag; |
| } |
| else if(!ctr && !i4_subtu_idx) |
| { |
| ps_recon_datastore->au1_is_chromaRecon_available[0] = 0; |
| } |
| /************************************************************/ |
| /* recon loop is done for all cases including skip cu */ |
| /* This is because skipping chroma reisdual based on luma */ |
| /* skip decision can lead to chroma artifacts */ |
| /************************************************************/ |
| /************************************************************/ |
| /*In the high quality and medium speed modes, wherein chroma*/ |
| /*and luma costs are included in the total cost calculation */ |
| /*the cost is just a ssd cost, and not that obtained through*/ |
| /*iq_it path */ |
| /************************************************************/ |
| if(ps_best_cu_prms->u1_skip_flag == 0) |
| { |
| WORD32 tu_bits; |
| |
| cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_src, |
| chrm_src_stride, |
| pi2_cur_deq_data, |
| deq_data_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| pu1_ecd_data + total_bytes_offset, |
| ps_ctxt->au1_cu_csbf, |
| ps_ctxt->i4_cu_csbf_strd, |
| trans_size, |
| scan_idx, |
| PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag, |
| &num_bytes, |
| &tu_bits, |
| &zero_cols, |
| &zero_rows, |
| &u1_is_recon_available, |
| i4_perform_sbh, |
| i4_perform_rdoq, |
| &trans_ssd_u, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| ps_best_cu_prms->u1_skip_flag, |
| u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| U_PLANE); |
| |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = 0; |
| } |
| else |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = UCHAR_MAX; |
| } |
| |
| #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| #else |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_recon, |
| i4_recon_stride, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| } |
| else |
| { |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| } |
| #endif |
| } |
| #endif |
| |
| curr_cb_cod_cost = |
| trans_ssd_u + |
| COMPUTE_RATE_COST_CLIP30( |
| tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
| |
| chrm_tu_bits += tu_bits; |
| i4_bits_cb += tu_bits; |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cbf) |
| { |
| COPY_CABAC_STATES( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| &ps_ctxt->s_rdopt_entropy_ctxt |
| .as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
| else |
| { |
| COPY_CABAC_STATES( |
| &ps_ctxt->s_rdopt_entropy_ctxt |
| .as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| |
| /* If Intra and TU=CU/2, need recon for next TUs */ |
| if(calc_recon) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_cur_deq_data, |
| deq_data_strd, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| (pu1_ecd_data + total_bytes_offset), |
| trans_size, |
| cbf, |
| zero_cols, |
| zero_rows, |
| U_PLANE); |
| |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = 0; |
| } |
| else |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = UCHAR_MAX; |
| } |
| } |
| else |
| { |
| /* num bytes is set to 0 */ |
| num_bytes = 0; |
| |
| /* cbf is returned as 0 */ |
| cbf = 0; |
| |
| curr_cb_cod_cost = trans_ssd_u = |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
| pu1_cur_pred, |
| pu1_cur_src, |
| pred_strd, |
| chrm_src_stride, |
| trans_size, |
| trans_size, |
| U_PLANE); |
| |
| if(u1_compute_spatial_ssd) |
| { |
| /* buffer copy fromp pred to recon */ |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| trans_size, |
| trans_size, |
| U_PLANE); |
| |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = 0; |
| } |
| |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| trans_ssd_u = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_u, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| U_PLANE); |
| } |
| |
| #if ENABLE_INTER_ZCU_COST |
| #if !WEIGH_CHROMA_COST |
| /* cbf = 0, accumulate cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost; |
| #else |
| /* cbf = 0, accumulate cu not coded cost */ |
| |
| ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
| (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| #endif |
| } |
| |
| #if !WEIGH_CHROMA_COST |
| curr_rdopt_cost += curr_cb_cod_cost; |
| #else |
| curr_rdopt_cost += |
| ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| chrm_cod_cost += curr_cb_cod_cost; |
| i8_ssd_cb += trans_ssd_u; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(curr_rdopt_cost > prev_best_rdopt_cost) |
| { |
| u1_is_early_exit_condition_satisfied = 1; |
| break; |
| } |
| } |
| |
| /* inter cu is coded if any of the tu is coded in it */ |
| ps_best_cu_prms->u1_is_cu_coded |= cbf; |
| |
| /* update CB related params */ |
| ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] = |
| total_bytes_offset + init_bytes_offset; |
| |
| if(0 == i4_subtu_idx) |
| { |
| ps_tu->s_tu.b1_cb_cbf = cbf; |
| } |
| else |
| { |
| ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf; |
| } |
| |
| total_bytes_offset += num_bytes; |
| |
| ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols; |
| ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows; |
| ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes; |
| |
| /* recon loop is done for non skip cases */ |
| if(ps_best_cu_prms->u1_skip_flag == 0) |
| { |
| WORD32 tu_bits; |
| |
| cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_src, |
| chrm_src_stride, |
| pi2_cur_deq_data + trans_size, |
| deq_data_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| pu1_ecd_data + total_bytes_offset, |
| ps_ctxt->au1_cu_csbf, |
| ps_ctxt->i4_cu_csbf_strd, |
| trans_size, |
| scan_idx, |
| PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag, |
| &num_bytes, |
| &tu_bits, |
| &zero_cols, |
| &zero_rows, |
| &u1_is_recon_available, |
| i4_perform_sbh, |
| i4_perform_rdoq, |
| &trans_ssd_v, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| i4_alpha_stim_multiplier, |
| u1_is_cu_noisy, |
| #endif |
| ps_best_cu_prms->u1_skip_flag, |
| u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| V_PLANE); |
| |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = 0; |
| } |
| else |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = UCHAR_MAX; |
| } |
| |
| #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| #else |
| if(u1_compute_spatial_ssd && u1_is_recon_available) |
| { |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_recon, |
| i4_recon_stride, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| } |
| else |
| { |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| } |
| #endif |
| } |
| #endif |
| |
| curr_cr_cod_cost = |
| trans_ssd_v + |
| COMPUTE_RATE_COST_CLIP30( |
| tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
| chrm_tu_bits += tu_bits; |
| i4_bits_cr += tu_bits; |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cbf) |
| { |
| COPY_CABAC_STATES( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| &ps_ctxt->s_rdopt_entropy_ctxt |
| .as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
| else |
| { |
| COPY_CABAC_STATES( |
| &ps_ctxt->s_rdopt_entropy_ctxt |
| .as_cu_entropy_ctxt[rd_opt_curr_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| } |
| |
| /* If Intra and TU=CU/2, need recon for next TUs */ |
| if(calc_recon) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| (pi2_cur_deq_data + trans_size), |
| deq_data_strd, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| (pu1_ecd_data + total_bytes_offset), |
| trans_size, |
| cbf, |
| zero_cols, |
| zero_rows, |
| V_PLANE); |
| |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = 0; |
| } |
| else |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = UCHAR_MAX; |
| } |
| } |
| else |
| { |
| /* num bytes is set to 0 */ |
| num_bytes = 0; |
| |
| /* cbf is returned as 0 */ |
| cbf = 0; |
| |
| curr_cr_cod_cost = trans_ssd_v = |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
| pu1_cur_pred, |
| pu1_cur_src, |
| pred_strd, |
| chrm_src_stride, |
| trans_size, |
| trans_size, |
| V_PLANE); |
| |
| if(u1_compute_spatial_ssd) |
| { |
| /* buffer copy fromp pred to recon */ |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_recon, |
| i4_recon_stride, |
| trans_size, |
| trans_size, |
| V_PLANE); |
| |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = 0; |
| } |
| |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| trans_ssd_v = ihevce_inject_stim_into_distortion( |
| pu1_cur_src, |
| chrm_src_stride, |
| pu1_cur_pred, |
| pred_strd, |
| trans_ssd_v, |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| V_PLANE); |
| } |
| |
| #if ENABLE_INTER_ZCU_COST |
| #if !WEIGH_CHROMA_COST |
| /* cbf = 0, accumulate cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost; |
| #else |
| /* cbf = 0, accumulate cu not coded cost */ |
| |
| ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
| (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| #endif |
| } |
| |
| #if !WEIGH_CHROMA_COST |
| curr_rdopt_cost += curr_cr_cod_cost; |
| #else |
| curr_rdopt_cost += |
| ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| |
| chrm_cod_cost += curr_cr_cod_cost; |
| i8_ssd_cr += trans_ssd_v; |
| |
| if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
| { |
| /* Early exit : If the current running cost exceeds |
| the prev. best mode cost, break */ |
| if(curr_rdopt_cost > prev_best_rdopt_cost) |
| { |
| u1_is_early_exit_condition_satisfied = 1; |
| break; |
| } |
| } |
| |
| /* inter cu is coded if any of the tu is coded in it */ |
| ps_best_cu_prms->u1_is_cu_coded |= cbf; |
| |
| /* update CR related params */ |
| ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] = |
| total_bytes_offset + init_bytes_offset; |
| |
| if(0 == i4_subtu_idx) |
| { |
| ps_tu->s_tu.b1_cr_cbf = cbf; |
| } |
| else |
| { |
| ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf; |
| } |
| |
| total_bytes_offset += num_bytes; |
| |
| ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols; |
| ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows; |
| ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes; |
| } |
| else |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] = |
| UCHAR_MAX; |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] = |
| UCHAR_MAX; |
| } |
| } |
| |
| if(u1_is_early_exit_condition_satisfied) |
| { |
| break; |
| } |
| |
| /* loop increments */ |
| ps_tu++; |
| ps_tu_temp_prms++; |
| } |
| |
| /* Signal as luma mode. HIGH_QUALITY may update it */ |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; |
| |
| /* modify the cost chrm_cod_cost */ |
| if(ps_ctxt->u1_enable_psyRDOPT) |
| { |
| UWORD8 *pu1_recon_cu; |
| WORD32 recon_stride; |
| WORD32 curr_pos_x; |
| WORD32 curr_pos_y; |
| WORD32 start_index; |
| WORD32 num_horz_cu_in_ctb; |
| WORD32 had_block_size; |
| /* tODO: sreenivasa ctb size has to be used appropriately */ |
| had_block_size = 8; |
| num_horz_cu_in_ctb = 2 * 64 / had_block_size; |
| |
| curr_pos_x = cu_pos_x << 3; /* pel units */ |
| curr_pos_y = cu_pos_y << 3; /* pel units */ |
| recon_stride = i4_recon_stride; |
| pu1_recon_cu = pu1_recon; |
| |
| /* start index to index the source satd of curr cu int he current ctb*/ |
| start_index = 2 * (curr_pos_x / had_block_size) + |
| (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
| |
| { |
| chrm_cod_cost += ihevce_psy_rd_cost_croma( |
| ps_ctxt->ai4_source_chroma_satd, |
| pu1_recon, |
| recon_stride, |
| 1, // |
| cu_size, |
| 0, // pic type |
| 0, //layer id |
| ps_ctxt->i4_satd_lamda, // lambda |
| start_index, |
| ps_ctxt->u1_is_input_data_hbd, // 8 bit |
| ps_ctxt->u1_chroma_array_type, |
| &ps_ctxt->s_cmn_opt_func |
| |
| ); // chroma subsampling 420 |
| } |
| } |
| } |
| else |
| { |
| u1_is_mode_eq_chroma_satd_mode = 1; |
| chrm_cod_cost = MAX_COST_64; |
| } |
| |
| /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */ |
| if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) && |
| (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)) |
| { |
| if(64 == cu_size) |
| { |
| ASSERT(TU_EQ_CU != func_proc_mode); |
| } |
| |
| if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode] |
| .i8_chroma_best_rdopt < chrm_cod_cost) |
| { |
| UWORD8 *pu1_src; |
| UWORD8 *pu1_ecd_data_src_cb; |
| UWORD8 *pu1_ecd_data_src_cr; |
| |
| chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt = |
| &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]; |
| |
| UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0]; |
| WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 }; |
| WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 }; |
| |
| pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0]; |
| chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt; |
| chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode; |
| chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits; |
| |
| if(u1_is_mode_eq_chroma_satd_mode) |
| { |
| chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode; |
| } |
| |
| /*Resetting total_num_bytes_to 0*/ |
| total_bytes_offset = 0; |
| |
| /* Update the CABAC state corresponding to chroma only */ |
| /* Chroma Cbf */ |
| memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2); |
| /* Chroma transform skip */ |
| memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1); |
| /* Chroma last coeff x prefix */ |
| memcpy( |
| pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15, |
| pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15, |
| 3); |
| /* Chroma last coeff y prefix */ |
| memcpy( |
| pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15, |
| pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15, |
| 3); |
| /* Chroma csbf */ |
| memcpy( |
| pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2, |
| pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2, |
| 2); |
| /* Chroma sig coeff flags */ |
| memcpy( |
| pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15); |
| /* Chroma absgt1 flags */ |
| memcpy( |
| pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16, |
| pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16, |
| 8); |
| /* Chroma absgt2 flags */ |
| memcpy( |
| pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4, |
| pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4, |
| 2); |
| |
| ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; |
| ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
| |
| /* update to luma decision as we update chroma in final mode */ |
| ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old; |
| |
| for(ctr = 0; ctr < u1_num_tus; ctr++) |
| { |
| for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++) |
| { |
| WORD32 cbf; |
| WORD32 num_bytes; |
| |
| pu1_ecd_data_src_cb = |
| &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0]; |
| pu1_ecd_data_src_cr = |
| &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0]; |
| |
| /* check if chroma present flag is set */ |
| if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) |
| { |
| UWORD8 *pu1_cur_pred_dest; |
| UWORD8 *pu1_cur_pred_src; |
| WORD32 pred_src_strd; |
| WORD16 *pi2_cur_deq_data_dest; |
| WORD16 *pi2_cur_deq_data_src_cb; |
| WORD16 *pi2_cur_deq_data_src_cr; |
| WORD32 deq_src_strd; |
| |
| WORD32 curr_pos_x, curr_pos_y; |
| |
| trans_size = ps_tu->s_tu.b3_size; |
| trans_size = (1 << (trans_size + 1)); /* in chroma units */ |
| |
| /*Deriving stride values*/ |
| pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride; |
| deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride; |
| |
| /* since 2x2 transform is not allowed for chroma*/ |
| if(2 == trans_size) |
| { |
| trans_size = 4; |
| } |
| |
| /* get the current tu posx and posy w.r.t to cu */ |
| curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3); |
| curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) + |
| (i4_subtu_idx * trans_size); |
| |
| /* 420sp case only vertical height will be half */ |
| if(0 == u1_is_422) |
| { |
| curr_pos_y >>= 1; |
| } |
| |
| /* increment the pointers to start of current TU */ |
| pu1_cur_pred_src = |
| ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x); |
| pu1_cur_pred_src += (curr_pos_y * pred_src_strd); |
| pu1_cur_pred_dest = (pu1_pred + curr_pos_x); |
| pu1_cur_pred_dest += (curr_pos_y * pred_strd); |
| |
| pi2_cur_deq_data_src_cb = |
| &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1); |
| pi2_cur_deq_data_src_cr = |
| &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1); |
| pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd); |
| pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd); |
| pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x; |
| pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd); |
| |
| /*Overwriting deq data with that belonging to the winning special mode |
| (luma mode != chroma mode) |
| ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to |
| correspondingly manipulate to copy WORD16 data*/ |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| (UWORD8 *)pi2_cur_deq_data_dest, |
| (deq_data_strd << 1), |
| (UWORD8 *)pi2_cur_deq_data_src_cb, |
| (deq_src_strd << 1), |
| (trans_size << 1), |
| trans_size); |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| (UWORD8 *)(pi2_cur_deq_data_dest + trans_size), |
| (deq_data_strd << 1), |
| (UWORD8 *)pi2_cur_deq_data_src_cr, |
| (deq_src_strd << 1), |
| (trans_size << 1), |
| trans_size); |
| |
| /*Overwriting pred data with that belonging to the winning special mode |
| (luma mode != chroma mode)*/ |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| pu1_cur_pred_dest, |
| pred_strd, |
| pu1_cur_pred_src, |
| pred_src_strd, |
| (trans_size << 1), |
| trans_size); |
| |
| num_bytes = ps_chr_intra_satd_ctxt |
| ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr]; |
| cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr]; |
| /* inter cu is coded if any of the tu is coded in it */ |
| ps_best_cu_prms->u1_is_cu_coded |= cbf; |
| |
| /* update CB related params */ |
| ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] = |
| total_bytes_offset + init_bytes_offset; |
| |
| if(0 == i4_subtu_idx) |
| { |
| ps_tu->s_tu.b1_cb_cbf = cbf; |
| } |
| else |
| { |
| ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf; |
| } |
| |
| /*Overwriting the cb ecd data corresponding to the special mode*/ |
| if(0 != num_bytes) |
| { |
| memcpy( |
| (pu1_ecd_data + total_bytes_offset), |
| pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx], |
| num_bytes); |
| } |
| |
| total_bytes_offset += num_bytes; |
| ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes; |
| ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes; |
| |
| num_bytes = ps_chr_intra_satd_ctxt |
| ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr]; |
| cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr]; |
| /* inter cu is coded if any of the tu is coded in it */ |
| ps_best_cu_prms->u1_is_cu_coded |= cbf; |
| |
| /*Overwriting the cr ecd data corresponding to the special mode*/ |
| if(0 != num_bytes) |
| { |
| memcpy( |
| (pu1_ecd_data + total_bytes_offset), |
| pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx], |
| num_bytes); |
| } |
| |
| /* update CR related params */ |
| ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] = |
| total_bytes_offset + init_bytes_offset; |
| |
| if(0 == i4_subtu_idx) |
| { |
| ps_tu->s_tu.b1_cr_cbf = cbf; |
| } |
| else |
| { |
| ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf; |
| } |
| |
| total_bytes_offset += num_bytes; |
| ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes; |
| |
| /*Updating zero rows and zero cols*/ |
| ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = |
| ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr]; |
| ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = |
| ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr]; |
| ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = |
| ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr]; |
| ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = |
| ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr]; |
| |
| ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes; |
| |
| if((u1_num_tus > 1) && |
| ps_recon_datastore->au1_is_chromaRecon_available[2]) |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = 2; |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = 2; |
| } |
| else if( |
| (1 == u1_num_tus) && |
| ps_recon_datastore->au1_is_chromaRecon_available[1]) |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = 1; |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = 1; |
| } |
| else |
| { |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
| [i4_subtu_idx] = UCHAR_MAX; |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
| [i4_subtu_idx] = UCHAR_MAX; |
| } |
| } |
| } |
| |
| /* loop increments */ |
| ps_tu++; |
| ps_tu_temp_prms++; |
| } |
| } |
| |
| if(!u1_is_422) |
| { |
| if(chrm_pred_mode == luma_pred_mode) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; |
| } |
| else if(chrm_pred_mode == 0) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 0; |
| } |
| else if(chrm_pred_mode == 1) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 3; |
| } |
| else if(chrm_pred_mode == 10) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 2; |
| } |
| else if(chrm_pred_mode == 26) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 1; |
| } |
| else |
| { |
| ASSERT(0); /*Should not come here*/ |
| } |
| } |
| else |
| { |
| if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode]) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; |
| } |
| else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0]) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 0; |
| } |
| else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1]) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 3; |
| } |
| else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10]) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 2; |
| } |
| else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26]) |
| { |
| ps_best_cu_prms->u1_chroma_intra_pred_mode = 1; |
| } |
| else |
| { |
| ASSERT(0); /*Should not come here*/ |
| } |
| } |
| } |
| |
| /* Store the actual chroma mode */ |
| ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode; |
| } |
| |
| /* update the total bytes produced */ |
| ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset; |
| |
| /* store the final chrm bits accumulated */ |
| *pi4_chrm_tu_bits = chrm_tu_bits; |
| |
| return (chrm_cod_cost); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_final_rdopt_mode_prcs \endif |
| * |
| * \brief |
| * Final RDOPT mode process function. Performs Recon computation for the |
| * final mode. Re-use or Compute pred, iq-data, coeff based on the flags. |
| * |
| * \param[in] pv_ctxt : pointer to enc_loop module |
| * \param[in] ps_prms : pointer to struct containing requisite parameters |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_final_rdopt_mode_prcs( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms) |
| { |
| enc_loop_cu_final_prms_t *ps_best_cu_prms; |
| tu_enc_loop_out_t *ps_tu_enc_loop; |
| tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms; |
| nbr_avail_flags_t s_nbr; |
| recon_datastore_t *ps_recon_datastore; |
| |
| ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
| ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; |
| ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr; |
| |
| WORD32 num_tu_in_cu; |
| LWORD64 rd_opt_cost; |
| WORD32 ctr; |
| WORD32 i4_subtu_idx; |
| WORD32 cu_size; |
| WORD32 cu_pos_x, cu_pos_y; |
| WORD32 chrm_present_flag = 1; |
| WORD32 num_bytes, total_bytes = 0; |
| WORD32 chrm_ctr = 0; |
| WORD32 u1_is_cu_coded; |
| UWORD8 *pu1_old_ecd_data; |
| UWORD8 *pu1_chrm_old_ecd_data; |
| UWORD8 *pu1_cur_pred; |
| WORD16 *pi2_deq_data; |
| WORD16 *pi2_chrm_deq_data; |
| WORD16 *pi2_cur_deq_data; |
| WORD16 *pi2_cur_deq_data_chrm; |
| UWORD8 *pu1_cur_luma_recon; |
| UWORD8 *pu1_cur_chroma_recon; |
| UWORD8 *pu1_cur_src; |
| UWORD8 *pu1_cur_src_chrm; |
| UWORD8 *pu1_cur_pred_chrm; |
| UWORD8 *pu1_intra_pred_mode; |
| UWORD32 *pu4_nbr_flags; |
| LWORD64 i8_ssd; |
| |
| cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms; |
| cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand; |
| enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms; |
| |
| WORD32 packed_pred_mode = ps_prms->packed_pred_mode; |
| WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx; |
| UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src; |
| WORD32 src_strd = ps_prms->src_strd; |
| UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred; |
| WORD32 pred_strd = ps_prms->pred_strd; |
| UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm; |
| WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd; |
| UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data; |
| UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf; |
| WORD32 csbf_strd = ps_prms->csbf_strd; |
| UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon; |
| WORD32 recon_luma_strd = ps_prms->recon_luma_strd; |
| UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon; |
| WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd; |
| UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x; |
| UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y; |
| UWORD8 u1_cu_size = ps_prms->u1_cu_size; |
| WORD8 i1_cu_qp = ps_prms->i1_cu_qp; |
| UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
| UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1; |
| /* Get the Chroma pointer and parameters */ |
| UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src; |
| WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride; |
| UWORD8 u1_compute_spatial_ssd_luma = 0; |
| UWORD8 u1_compute_spatial_ssd_chroma = 0; |
| /* Get the pointer for function selector */ |
| ihevc_intra_pred_luma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| ihevc_intra_pred_ref_filtering_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr; |
| |
| ihevc_intra_pred_chroma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; |
| |
| /* Get the best CU parameters */ |
| ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; |
| num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu; |
| cu_size = ps_best_cu_prms->u1_cu_size; |
| cu_pos_x = u1_cu_pos_x; |
| cu_pos_y = u1_cu_pos_y; |
| pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0]; |
| pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0]; |
| ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore; |
| |
| /* get the first TU pointer */ |
| ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0]; |
| /* get the first TU only enc_loop prms pointer */ |
| ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
| /*modify quant related param in ctxt based on current cu qp*/ |
| if((ps_ctxt->i1_cu_qp_delta_enable)) |
| { |
| /*recompute quant related param at every cu level*/ |
| ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp); |
| |
| /* get frame level lambda params */ |
| ihevce_get_cl_cu_lambda_prms( |
| ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp); |
| } |
| |
| ps_best_cu_prms->i8_cu_ssd = 0; |
| ps_best_cu_prms->u4_cu_open_intra_sad = 0; |
| |
| /* For skip case : Set TU_size = CU_size and make cbf = 0 |
| so that same TU loop can be used for all modes */ |
| if(PRED_MODE_SKIP == packed_pred_mode) |
| { |
| for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
| { |
| ps_tu_enc_loop->s_tu.b1_y_cbf = 0; |
| |
| ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0; |
| |
| ps_tu_enc_loop++; |
| ps_tu_enc_loop_temp_prms++; |
| } |
| |
| /* go back to the first TU pointer */ |
| ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0]; |
| ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
| } |
| /** For inter case, pred calculation is outside the loop **/ |
| if(PRED_MODE_INTRA != packed_pred_mode) |
| { |
| /**------------- Compute pred data if required --------------**/ |
| if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)) |
| { |
| nbr_4x4_t *ps_topleft_nbr_4x4; |
| nbr_4x4_t *ps_left_nbr_4x4; |
| nbr_4x4_t *ps_top_nbr_4x4; |
| WORD32 nbr_4x4_left_strd; |
| |
| ps_best_inter_cand->pu1_pred_data = pu1_pred; |
| ps_best_inter_cand->i4_pred_data_stride = pred_strd; |
| |
| /* Get the CU nbr information */ |
| ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4; |
| ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4; |
| ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4; |
| nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd; |
| |
| /* MVP ,MVD calc and Motion compensation */ |
| rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)( |
| ps_ctxt, |
| ps_best_inter_cand, |
| u1_cu_size, |
| cu_pos_x, |
| cu_pos_y, |
| ps_left_nbr_4x4, |
| ps_top_nbr_4x4, |
| ps_topleft_nbr_4x4, |
| nbr_4x4_left_strd, |
| rd_opt_best_idx); |
| } |
| |
| /** ------ Motion Compensation for Chroma -------- **/ |
| if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data) |
| { |
| UWORD8 *pu1_cur_pred; |
| pu1_cur_pred = pu1_pred_chrm; |
| |
| /* run a loop over all the partitons in cu */ |
| for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++) |
| { |
| pu_t *ps_pu; |
| WORD32 inter_pu_wd, inter_pu_ht; |
| |
| ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr]; |
| |
| /* IF AMP then each partitions can have diff wd ht */ |
| inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ |
| inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; |
| inter_pu_ht <<= u1_is_422; |
| /* chroma mc func */ |
| ihevce_chroma_inter_pred_pu( |
| &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd); |
| if(2 == ps_best_cu_prms->u2_num_pus_in_cu) |
| { |
| /* 2Nx__ partion case */ |
| if(inter_pu_wd == ps_best_cu_prms->u1_cu_size) |
| { |
| pu1_cur_pred += (inter_pu_ht * pred_chrm_strd); |
| } |
| /* __x2N partion case */ |
| if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0))) |
| { |
| pu1_cur_pred += inter_pu_wd; |
| } |
| } |
| } |
| } |
| } |
| pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0]; |
| pi2_chrm_deq_data = |
| &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx; |
| pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0]; |
| pu1_chrm_old_ecd_data = |
| &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx; |
| |
| /* default value for cu coded flag */ |
| u1_is_cu_coded = 0; |
| |
| /* If we are re-computing coeff, set sad to 0 and start accumulating */ |
| /* else use the best cand. sad from RDOPT stage */ |
| if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) |
| { |
| /*init of ssd of CU accuumulated over all TU*/ |
| ps_best_cu_prms->u4_cu_sad = 0; |
| |
| /* reset the luma residual bits */ |
| ps_best_cu_prms->u4_cu_luma_res_bits = 0; |
| } |
| |
| if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data) |
| { |
| /* reset the chroma residual bits */ |
| ps_best_cu_prms->u4_cu_chroma_res_bits = 0; |
| } |
| |
| if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) || |
| (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)) |
| { |
| /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as |
| the quantized coefficients might be changed. |
| We are copying only those states which correspond to the header from the cabac state |
| of the previous CU, because the header is going to be recomputed for this condition*/ |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1; |
| memcpy( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
| IHEVC_CAB_COEFFX_PREFIX); |
| |
| if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)) |
| { |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX), |
| (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] + |
| IHEVC_CAB_COEFFX_PREFIX), |
| (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX)); |
| } |
| else |
| { |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX), |
| (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX), |
| (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX)); |
| } |
| ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx; |
| } |
| else |
| { |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0; |
| } |
| |
| /* Zero cbf tool is disabled for intra CUs */ |
| if(PRED_MODE_INTRA == packed_pred_mode) |
| { |
| #if ENABLE_ZERO_CBF_IN_INTRA |
| ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
| #else |
| ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; |
| #endif |
| } |
| else |
| { |
| #if DISABLE_ZERO_ZBF_IN_INTER |
| ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; |
| #else |
| ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
| #endif |
| } |
| |
| /** Loop for all tu blocks in current cu and do reconstruction **/ |
| for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
| { |
| tu_t *ps_tu; |
| WORD32 trans_size, num_4x4_in_tu; |
| WORD32 cbf, zero_rows, zero_cols; |
| WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4; |
| WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix; |
| WORD32 luma_pred_mode, chroma_pred_mode = 0; |
| UWORD8 au1_is_recon_available[2]; |
| |
| ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */ |
| |
| u1_compute_spatial_ssd_luma = 0; |
| u1_compute_spatial_ssd_chroma = 0; |
| |
| trans_size = 1 << (ps_tu->b3_size + 2); |
| num_4x4_in_tu = (trans_size >> 2); |
| cu_pos_x_in_4x4 = ps_tu->b4_pos_x; |
| cu_pos_y_in_4x4 = ps_tu->b4_pos_y; |
| |
| /* populate the coeffs scan idx */ |
| ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| /* get the current pos x and pos y in pixels */ |
| cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3); |
| cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3); |
| |
| /* Update pointers based on the location */ |
| pu1_cur_src = pu1_src + cu_pos_x_in_pix; |
| pu1_cur_src += (cu_pos_y_in_pix * src_strd); |
| pu1_cur_pred = pu1_pred + cu_pos_x_in_pix; |
| pu1_cur_pred += (cu_pos_y_in_pix * pred_strd); |
| |
| pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix; |
| pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd); |
| |
| pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix; |
| pi2_cur_deq_data += cu_pos_y_in_pix * cu_size; |
| |
| pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix; |
| pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd)); |
| |
| pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix; |
| pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd)); |
| |
| pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix; |
| pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd)); |
| |
| pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix; |
| pi2_cur_deq_data_chrm += |
| ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size)); |
| |
| /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/ |
| chrm_present_flag = 1; /* by default chroma present is set to 1*/ |
| |
| if(4 == trans_size) |
| { |
| /* if tusize is 4x4 then only first luma 4x4 will have chroma*/ |
| if(0 != chrm_ctr) |
| { |
| chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE; |
| } |
| |
| /* increment the chrm ctr unconditionally */ |
| chrm_ctr++; |
| /* after ctr reached 4 reset it */ |
| if(4 == chrm_ctr) |
| { |
| chrm_ctr = 0; |
| } |
| } |
| |
| /**------------- Compute pred data if required --------------**/ |
| if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */ |
| { |
| /* Get the pred mode for scan idx calculation, even if pred is not required */ |
| luma_pred_mode = *pu1_intra_pred_mode; |
| |
| if((ps_ctxt->i4_rc_pass == 1) || |
| (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)) |
| { |
| WORD32 nbr_flags; |
| WORD32 luma_pred_func_idx; |
| UWORD8 *pu1_left; |
| UWORD8 *pu1_top; |
| UWORD8 *pu1_top_left; |
| WORD32 left_strd; |
| |
| /* left cu boundary */ |
| if(0 == cu_pos_x_in_pix) |
| { |
| left_strd = ps_cu_nbr_prms->cu_left_stride; |
| pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd; |
| } |
| else |
| { |
| pu1_left = pu1_cur_luma_recon - 1; |
| left_strd = recon_luma_strd; |
| } |
| |
| /* top cu boundary */ |
| if(0 == cu_pos_y_in_pix) |
| { |
| pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix; |
| } |
| else |
| { |
| pu1_top = pu1_cur_luma_recon - recon_luma_strd; |
| } |
| |
| /* by default top left is set to cu top left */ |
| pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left; |
| |
| /* top left based on position */ |
| if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix)) |
| { |
| pu1_top_left = pu1_left - left_strd; |
| } |
| else if(0 != cu_pos_x_in_pix) |
| { |
| pu1_top_left = pu1_top - 1; |
| } |
| |
| /* get the neighbour availability flags */ |
| nbr_flags = ihevce_get_nbr_intra( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x_in_4x4, |
| cu_pos_y_in_4x4, |
| num_4x4_in_tu); |
| |
| if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data) |
| { |
| /* copy the nbr flags for chroma reuse */ |
| if(4 != trans_size) |
| { |
| *pu4_nbr_flags = nbr_flags; |
| } |
| else if(1 == chrm_present_flag) |
| { |
| /* compute the avail flags assuming luma trans is 8x8 */ |
| /* get the neighbour availability flags */ |
| *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x_in_4x4, |
| cu_pos_y_in_4x4, |
| (num_4x4_in_tu << 1), |
| (num_4x4_in_tu << 1)); |
| } |
| |
| /* call reference array substitution */ |
| ihevc_intra_pred_luma_ref_substitution_fptr( |
| pu1_top_left, |
| pu1_top, |
| pu1_left, |
| left_strd, |
| trans_size, |
| nbr_flags, |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1); |
| |
| /* call reference filtering */ |
| ihevc_intra_pred_ref_filtering_fptr( |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| trans_size, |
| (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
| luma_pred_mode, |
| ps_ctxt->i1_strong_intra_smoothing_enable_flag); |
| |
| /* use the look up to get the function idx */ |
| luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode]; |
| |
| /* call the intra prediction function */ |
| ps_ctxt->apf_lum_ip[luma_pred_func_idx]( |
| (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
| 1, |
| pu1_cur_pred, |
| pred_strd, |
| trans_size, |
| luma_pred_mode); |
| } |
| } |
| else if( |
| (1 == chrm_present_flag) && |
| (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)) |
| { |
| WORD32 temp_num_4x4_in_tu = num_4x4_in_tu; |
| |
| if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */ |
| { |
| temp_num_4x4_in_tu = num_4x4_in_tu << 1; |
| } |
| |
| *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x_in_4x4, |
| cu_pos_y_in_4x4, |
| temp_num_4x4_in_tu, |
| temp_num_4x4_in_tu); |
| } |
| |
| /* Get the pred mode for scan idx calculation, even if pred is not required */ |
| chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode; |
| } |
| |
| if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) |
| { |
| WORD32 temp_bits; |
| LWORD64 temp_cost; |
| UWORD32 u4_tu_sad; |
| WORD32 perform_sbh, perform_rdoq; |
| |
| if(PRED_MODE_INTRA == packed_pred_mode) |
| { |
| /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/ |
| if(trans_size < 16) |
| { |
| /* for modes from 22 upto 30 horizontal scan is used */ |
| if((luma_pred_mode > 21) && (luma_pred_mode < 31)) |
| { |
| ps_ctxt->i4_scan_idx = SCAN_HORZ; |
| } |
| /* for modes from 6 upto 14 horizontal scan is used */ |
| else if((luma_pred_mode > 5) && (luma_pred_mode < 15)) |
| { |
| ps_ctxt->i4_scan_idx = SCAN_VERT; |
| } |
| } |
| } |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| if(ps_prms->u1_recompute_sbh_and_rdoq) |
| { |
| perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH); |
| perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ); |
| } |
| else |
| { |
| /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ |
| perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh; |
| /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean |
| we would have to do RDOQ again.*/ |
| perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq; |
| } |
| |
| #if DISABLE_RDOQ_INTRA |
| if(PRED_MODE_INTRA == packed_pred_mode) |
| { |
| perform_rdoq = 0; |
| } |
| #endif |
| /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled |
| so that all candidates and best candidate are quantized with same rounding factor */ |
| if(1 == perform_rdoq) |
| { |
| ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING); |
| } |
| |
| cbf = ihevce_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_src, |
| src_strd, |
| pi2_cur_deq_data, |
| cu_size, /*deq_data stride is cu_size*/ |
| pu1_cur_luma_recon, |
| recon_luma_strd, |
| pu1_final_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd, |
| trans_size, |
| packed_pred_mode, |
| &temp_cost, |
| &num_bytes, |
| &temp_bits, |
| &u4_tu_sad, |
| &zero_cols, |
| &zero_rows, |
| &au1_is_recon_available[0], |
| perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level), |
| perform_sbh, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
| 100.0, |
| ps_prms->u1_is_cu_noisy, |
| #endif |
| u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
| 1 /*early cbf*/ |
| ); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level)); |
| |
| /* Accumulate luma residual bits */ |
| ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits; |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cbf) |
| { |
| /* update to new state only if CBF is non zero */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| |
| /* accumulate the TU sad into cu sad */ |
| ps_best_cu_prms->u4_cu_sad += u4_tu_sad; |
| ps_tu->b1_y_cbf = cbf; |
| ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes; |
| |
| /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */ |
| if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass)) |
| { |
| WORD32 num_4x4_in_cu = u1_cu_size >> 2; |
| nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0]; |
| ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2)); |
| ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu); |
| /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ |
| ps_cur_nbr_4x4->b1_y_cbf = cbf; |
| /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/ |
| ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; |
| /* Qp and cbf are stored for the all 4x4 in TU */ |
| { |
| WORD32 i, j; |
| nbr_4x4_t *ps_tmp_4x4; |
| ps_tmp_4x4 = ps_cur_nbr_4x4; |
| |
| for(i = 0; i < num_4x4_in_tu; i++) |
| { |
| for(j = 0; j < num_4x4_in_tu; j++) |
| { |
| ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp; |
| ps_tmp_4x4[j].b1_y_cbf = cbf; |
| } |
| /* row level update*/ |
| ps_tmp_4x4 += num_4x4_in_cu; |
| } |
| } |
| } |
| } |
| else |
| { |
| zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col; |
| zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row; |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed; |
| } |
| else |
| { |
| num_bytes = 0; |
| } |
| |
| /* copy luma ecd data to final buffer */ |
| memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes); |
| |
| pu1_old_ecd_data += num_bytes; |
| |
| au1_is_recon_available[0] = 0; |
| } |
| |
| /**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/ |
| if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
| (!u1_compute_spatial_ssd_luma || |
| (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma))) |
| { |
| if(!ps_recon_datastore->u1_is_lumaRecon_available || |
| (ps_recon_datastore->u1_is_lumaRecon_available && |
| (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))) |
| { |
| ihevce_it_recon_fxn( |
| ps_ctxt, |
| pi2_cur_deq_data, |
| cu_size, |
| pu1_cur_pred, |
| pred_strd, |
| pu1_cur_luma_recon, |
| recon_luma_strd, |
| pu1_final_ecd_data, |
| trans_size, |
| packed_pred_mode, |
| ps_tu->b1_y_cbf, |
| zero_cols, |
| zero_rows); |
| } |
| else if( |
| ps_recon_datastore->u1_is_lumaRecon_available && |
| (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])) |
| { |
| UWORD8 *pu1_recon_src = |
| ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) + |
| cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride; |
| |
| ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
| pu1_cur_luma_recon, |
| recon_luma_strd, |
| pu1_recon_src, |
| ps_recon_datastore->i4_lumaRecon_stride, |
| trans_size, |
| trans_size); |
| } |
| } |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes; |
| } |
| |
| pu1_final_ecd_data += num_bytes; |
| /* update total bytes consumed */ |
| total_bytes += num_bytes; |
| |
| u1_is_cu_coded |= ps_tu->b1_y_cbf; |
| |
| /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/ |
| if(1 == chrm_present_flag) |
| { |
| pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix; |
| pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd)); |
| |
| pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix; |
| pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd)); |
| |
| pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix; |
| pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd)); |
| |
| pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix; |
| pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) + |
| (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size)); |
| |
| if(INCLUDE_CHROMA_DURING_TU_RECURSION && |
| (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) && |
| (PRED_MODE_INTRA != packed_pred_mode)) |
| { |
| WORD32 i4_num_bytes; |
| UWORD8 *pu1_chroma_pred; |
| UWORD8 *pu1_chroma_recon; |
| WORD16 *pi2_chroma_deq; |
| UWORD32 u4_zero_col; |
| UWORD32 u4_zero_row; |
| |
| for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) |
| { |
| WORD32 chroma_trans_size = MAX(4, trans_size >> 1); |
| WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; |
| WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); |
| |
| if(0 == u1_is_422) |
| { |
| i4_subtu_pos_y >>= 1; |
| } |
| |
| pu1_chroma_pred = |
| pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); |
| pu1_chroma_recon = pu1_cur_chroma_recon + |
| (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); |
| pi2_chroma_deq = |
| pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size); |
| |
| u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx]; |
| u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx]; |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| i4_num_bytes = |
| ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx]; |
| } |
| else |
| { |
| i4_num_bytes = 0; |
| } |
| |
| memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes); |
| |
| pu1_old_ecd_data += i4_num_bytes; |
| |
| au1_is_recon_available[U_PLANE] = 0; |
| |
| if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
| (!u1_compute_spatial_ssd_chroma || |
| (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma))) |
| { |
| if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
| (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX == |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_chroma_deq, |
| cu_size, |
| pu1_chroma_pred, |
| pred_chrm_strd, |
| pu1_chroma_recon, |
| recon_chrma_strd, |
| pu1_final_ecd_data, |
| chroma_trans_size, |
| (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1, |
| u4_zero_col, |
| u4_zero_row, |
| U_PLANE); |
| } |
| else if( |
| ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX != |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])) |
| { |
| UWORD8 *pu1_recon_src = |
| ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
| [U_PLANE][ctr][i4_subtu_idx]]) + |
| i4_subtu_pos_x + |
| i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_recon_src, |
| ps_recon_datastore->i4_lumaRecon_stride, |
| pu1_chroma_recon, |
| recon_chrma_strd, |
| chroma_trans_size, |
| chroma_trans_size, |
| U_PLANE); |
| } |
| } |
| |
| u1_is_cu_coded |= |
| ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf); |
| |
| pu1_final_ecd_data += i4_num_bytes; |
| total_bytes += i4_num_bytes; |
| } |
| |
| for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) |
| { |
| WORD32 chroma_trans_size = MAX(4, trans_size >> 1); |
| WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; |
| WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); |
| |
| if(0 == u1_is_422) |
| { |
| i4_subtu_pos_y >>= 1; |
| } |
| |
| pu1_chroma_pred = |
| pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); |
| pu1_chroma_recon = pu1_cur_chroma_recon + |
| (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); |
| pi2_chroma_deq = pi2_cur_deq_data_chrm + |
| (i4_subtu_idx * chroma_trans_size * cu_size) + |
| chroma_trans_size; |
| |
| u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx]; |
| u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx]; |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| i4_num_bytes = |
| ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx]; |
| } |
| else |
| { |
| i4_num_bytes = 0; |
| } |
| |
| memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes); |
| |
| pu1_old_ecd_data += i4_num_bytes; |
| |
| au1_is_recon_available[V_PLANE] = 0; |
| |
| if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
| (!u1_compute_spatial_ssd_chroma || |
| (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma))) |
| { |
| if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
| (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX == |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_chroma_deq, |
| cu_size, |
| pu1_chroma_pred, |
| pred_chrm_strd, |
| pu1_chroma_recon, |
| recon_chrma_strd, |
| pu1_final_ecd_data, |
| chroma_trans_size, |
| (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1, |
| u4_zero_col, |
| u4_zero_row, |
| V_PLANE); |
| } |
| else if( |
| ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX != |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])) |
| { |
| UWORD8 *pu1_recon_src = |
| ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
| [V_PLANE][ctr][i4_subtu_idx]]) + |
| i4_subtu_pos_x + |
| i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_recon_src, |
| ps_recon_datastore->i4_lumaRecon_stride, |
| pu1_chroma_recon, |
| recon_chrma_strd, |
| chroma_trans_size, |
| chroma_trans_size, |
| V_PLANE); |
| } |
| } |
| |
| u1_is_cu_coded |= |
| ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf); |
| |
| pu1_final_ecd_data += i4_num_bytes; |
| total_bytes += i4_num_bytes; |
| } |
| } |
| else |
| { |
| WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row; |
| |
| for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) |
| { |
| WORD32 cb_cbf, cr_cbf; |
| WORD32 cb_num_bytes, cr_num_bytes; |
| |
| WORD32 chroma_trans_size = MAX(4, trans_size >> 1); |
| |
| WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; |
| WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); |
| |
| if(0 == u1_is_422) |
| { |
| i4_subtu_pos_y >>= 1; |
| } |
| |
| pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd); |
| pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); |
| pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); |
| pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size); |
| |
| if((PRED_MODE_INTRA == packed_pred_mode) && |
| (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)) |
| { |
| WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx; |
| UWORD8 *pu1_left_chrm; |
| UWORD8 *pu1_top_chrm; |
| UWORD8 *pu1_top_left_chrm; |
| |
| nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
| *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422); |
| |
| /* left cu boundary */ |
| if(0 == i4_subtu_pos_x) |
| { |
| left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride; |
| pu1_left_chrm = |
| ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm; |
| } |
| else |
| { |
| pu1_left_chrm = pu1_cur_chroma_recon - 2; |
| left_strd_chrm = recon_chrma_strd; |
| } |
| |
| /* top cu boundary */ |
| if(0 == i4_subtu_pos_y) |
| { |
| pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x; |
| } |
| else |
| { |
| pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd; |
| } |
| |
| /* by default top left is set to cu top left */ |
| pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left; |
| |
| /* top left based on position */ |
| if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x)) |
| { |
| pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm; |
| } |
| else if(0 != i4_subtu_pos_x) |
| { |
| pu1_top_left_chrm = pu1_top_chrm - 2; |
| } |
| |
| /* call the chroma reference array substitution */ |
| ihevc_intra_pred_chroma_ref_substitution_fptr( |
| pu1_top_left_chrm, |
| pu1_top_chrm, |
| pu1_left_chrm, |
| left_strd_chrm, |
| chroma_trans_size, |
| nbr_flags, |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1); |
| |
| /* use the look up to get the function idx */ |
| chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode]; |
| |
| /* call the intra prediction function */ |
| ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( |
| (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
| 1, |
| pu1_cur_pred_chrm, |
| pred_chrm_strd, |
| chroma_trans_size, |
| chroma_pred_mode); |
| } |
| |
| /**---------- Compute iq&coeff data if required : Chroma ------------**/ |
| if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data) |
| { |
| WORD32 perform_sbh, perform_rdoq, temp_bits; |
| |
| if(ps_prms->u1_recompute_sbh_and_rdoq) |
| { |
| perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH); |
| perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ); |
| } |
| else |
| { |
| /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ |
| perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh; |
| /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean |
| we would have to do RDOQ again.*/ |
| perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq; |
| } |
| |
| /* populate the coeffs scan idx */ |
| ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
| |
| if(PRED_MODE_INTRA == packed_pred_mode) |
| { |
| /* for 4x4 transforms based on intra pred mode scan is choosen*/ |
| if(4 == chroma_trans_size) |
| { |
| /* for modes from 22 upto 30 horizontal scan is used */ |
| if((chroma_pred_mode > 21) && (chroma_pred_mode < 31)) |
| { |
| ps_ctxt->i4_scan_idx = SCAN_HORZ; |
| } |
| /* for modes from 6 upto 14 horizontal scan is used */ |
| else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15)) |
| { |
| ps_ctxt->i4_scan_idx = SCAN_VERT; |
| } |
| } |
| } |
| |
| #if DISABLE_RDOQ_INTRA |
| if(PRED_MODE_INTRA == packed_pred_mode) |
| { |
| perform_rdoq = 0; |
| } |
| #endif |
| |
| /* RDOPT copy States : TU init (best until prev TU) to current */ |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| |
| ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx); |
| /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled |
| so that all candidates and best candidate are quantized with same rounding factor */ |
| if(1 == perform_rdoq) |
| { |
| ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING); |
| } |
| |
| if(!ps_best_cu_prms->u1_skip_flag || |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
| { |
| /* Cb */ |
| cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred_chrm, |
| pred_chrm_strd, |
| pu1_cur_src_chrm, |
| src_chrm_strd, |
| pi2_cur_deq_data_chrm, |
| cu_size, |
| pu1_chrm_recon, |
| recon_chrma_strd, |
| pu1_final_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd, |
| chroma_trans_size, |
| ps_ctxt->i4_scan_idx, |
| (PRED_MODE_INTRA == packed_pred_mode), |
| &cb_num_bytes, |
| &temp_bits, |
| &cb_zero_col, |
| &cb_zero_row, |
| &au1_is_recon_available[U_PLANE], |
| perform_sbh, |
| perform_rdoq, |
| &i8_ssd, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| !ps_ctxt->u1_is_refPic |
| ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
| 100.0, |
| ps_prms->u1_is_cu_noisy, |
| #endif |
| ps_best_cu_prms->u1_skip_flag && |
| ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt, |
| u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD |
| : FREQUENCY_DOMAIN_SSD, |
| U_PLANE); |
| } |
| else |
| { |
| cb_cbf = 0; |
| temp_bits = 0; |
| cb_num_bytes = 0; |
| au1_is_recon_available[U_PLANE] = 0; |
| cb_zero_col = 0; |
| cb_zero_row = 0; |
| } |
| |
| /* Accumulate chroma residual bits */ |
| ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits; |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cb_cbf) |
| { |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
| else |
| { |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| |
| if(!ps_best_cu_prms->u1_skip_flag || |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
| { |
| /* Cr */ |
| cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ps_ctxt, |
| pu1_cur_pred_chrm, |
| pred_chrm_strd, |
| pu1_cur_src_chrm, |
| src_chrm_strd, |
| pi2_cur_deq_data_chrm + chroma_trans_size, |
| cu_size, |
| pu1_chrm_recon, |
| recon_chrma_strd, |
| pu1_final_ecd_data + cb_num_bytes, |
| pu1_csbf_buf, |
| csbf_strd, |
| chroma_trans_size, |
| ps_ctxt->i4_scan_idx, |
| (PRED_MODE_INTRA == packed_pred_mode), |
| &cr_num_bytes, |
| &temp_bits, |
| &cr_zero_col, |
| &cr_zero_row, |
| &au1_is_recon_available[V_PLANE], |
| perform_sbh, |
| perform_rdoq, |
| &i8_ssd, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| !ps_ctxt->u1_is_refPic |
| ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
| 100.0, |
| ps_prms->u1_is_cu_noisy, |
| #endif |
| ps_best_cu_prms->u1_skip_flag && |
| ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt, |
| u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD |
| : FREQUENCY_DOMAIN_SSD, |
| V_PLANE); |
| } |
| else |
| { |
| cr_cbf = 0; |
| temp_bits = 0; |
| cr_num_bytes = 0; |
| au1_is_recon_available[V_PLANE] = 0; |
| cr_zero_col = 0; |
| cr_zero_row = 0; |
| } |
| |
| /* Accumulate chroma residual bits */ |
| ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits; |
| |
| /* RDOPT copy States : New updated after curr TU to TU init */ |
| if(0 != cr_cbf) |
| { |
| COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0] + |
| IHEVC_CAB_COEFFX_PREFIX, |
| IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
| } |
| |
| if(0 == i4_subtu_idx) |
| { |
| ps_tu->b1_cb_cbf = cb_cbf; |
| ps_tu->b1_cr_cbf = cr_cbf; |
| } |
| else |
| { |
| ps_tu->b1_cb_cbf_subtu1 = cb_cbf; |
| ps_tu->b1_cr_cbf_subtu1 = cr_cbf; |
| } |
| } |
| else |
| { |
| cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx]; |
| cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx]; |
| cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx]; |
| cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx]; |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| cb_num_bytes = |
| ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx]; |
| } |
| else |
| { |
| cb_num_bytes = 0; |
| } |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| cr_num_bytes = |
| ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx]; |
| } |
| else |
| { |
| cr_num_bytes = 0; |
| } |
| |
| /* copy cb ecd data to final buffer */ |
| memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes); |
| |
| pu1_chrm_old_ecd_data += cb_num_bytes; |
| |
| /* copy cb ecd data to final buffer */ |
| memcpy( |
| (pu1_final_ecd_data + cb_num_bytes), |
| pu1_chrm_old_ecd_data, |
| cr_num_bytes); |
| |
| pu1_chrm_old_ecd_data += cr_num_bytes; |
| |
| au1_is_recon_available[U_PLANE] = 0; |
| au1_is_recon_available[V_PLANE] = 0; |
| } |
| |
| /**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/ |
| if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
| (!u1_compute_spatial_ssd_chroma || |
| (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma))) |
| { |
| if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
| (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX == |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_cur_deq_data_chrm, |
| cu_size, |
| pu1_cur_pred_chrm, |
| pred_chrm_strd, |
| pu1_cur_chroma_recon, |
| recon_chrma_strd, |
| pu1_final_ecd_data, |
| chroma_trans_size, |
| (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1, |
| cb_zero_col, |
| cb_zero_row, |
| U_PLANE); |
| } |
| else if( |
| ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX != |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])) |
| { |
| UWORD8 *pu1_recon_src = |
| ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
| [U_PLANE][ctr][i4_subtu_idx]]) + |
| i4_subtu_pos_x + |
| i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_recon_src, |
| ps_recon_datastore->i4_lumaRecon_stride, |
| pu1_cur_chroma_recon, |
| recon_chrma_strd, |
| chroma_trans_size, |
| chroma_trans_size, |
| U_PLANE); |
| } |
| } |
| |
| u1_is_cu_coded |= |
| ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf); |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes; |
| } |
| |
| pu1_final_ecd_data += cb_num_bytes; |
| /* update total bytes consumed */ |
| total_bytes += cb_num_bytes; |
| |
| if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
| (!u1_compute_spatial_ssd_chroma || |
| (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma))) |
| { |
| if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
| (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX == |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))) |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_cur_deq_data_chrm + chroma_trans_size, |
| cu_size, |
| pu1_cur_pred_chrm, |
| pred_chrm_strd, |
| pu1_cur_chroma_recon, |
| recon_chrma_strd, |
| pu1_final_ecd_data, |
| chroma_trans_size, |
| (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1, |
| cr_zero_col, |
| cr_zero_row, |
| V_PLANE); |
| } |
| else if( |
| ps_recon_datastore->au1_is_chromaRecon_available[0] && |
| (UCHAR_MAX != |
| ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])) |
| { |
| UWORD8 *pu1_recon_src = |
| ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
| [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
| [V_PLANE][ctr][i4_subtu_idx]]) + |
| i4_subtu_pos_x + |
| i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_recon_src, |
| ps_recon_datastore->i4_lumaRecon_stride, |
| pu1_cur_chroma_recon, |
| recon_chrma_strd, |
| chroma_trans_size, |
| chroma_trans_size, |
| V_PLANE); |
| } |
| } |
| |
| u1_is_cu_coded |= |
| ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf); |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes; |
| } |
| |
| pu1_final_ecd_data += cr_num_bytes; |
| /* update total bytes consumed */ |
| total_bytes += cr_num_bytes; |
| } |
| } |
| } |
| else |
| { |
| ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes; |
| ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes; |
| ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes; |
| ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes; |
| ps_tu->b1_cb_cbf = 0; |
| ps_tu->b1_cr_cbf = 0; |
| ps_tu->b1_cb_cbf_subtu1 = 0; |
| ps_tu->b1_cr_cbf_subtu1 = 0; |
| } |
| |
| /* Update to next TU */ |
| ps_tu_enc_loop++; |
| ps_tu_enc_loop_temp_prms++; |
| |
| pu4_nbr_flags++; |
| pu1_intra_pred_mode++; |
| |
| /*Do not set the nbr map for last pu in cu */ |
| if((num_tu_in_cu - 1) != ctr) |
| { |
| /* set the neighbour map to 1 */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x_in_4x4, |
| cu_pos_y_in_4x4, |
| (trans_size >> 2), |
| 1); |
| } |
| } |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded; |
| |
| /* Modify skip flag, if luma is skipped & Chroma is coded */ |
| if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode)) |
| { |
| ps_best_cu_prms->u1_skip_flag = 0; |
| } |
| } |
| |
| /* during chroma evaluation if skip decision was over written */ |
| /* then the current skip candidate is set to a non skip candidate */ |
| if(PRED_MODE_INTRA != packed_pred_mode) |
| { |
| ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag; |
| } |
| |
| /**------------- Compute header data if required --------------**/ |
| if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data) |
| { |
| WORD32 cbf_bits; |
| WORD32 cu_bits; |
| WORD32 unit_4x4_size = cu_size >> 2; |
| |
| /*Restoring the running reference into the best rdopt_ctxt cabac states which will then |
| be copied as the base reference for the next cu |
| Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either |
| luma and chroma are being reevaluated*/ |
| COPY_CABAC_STATES( |
| &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
| .s_cabac_ctxt.au1_ctxt_models[0], |
| &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
| IHEVC_CAB_CTXT_END); |
| |
| /* get the neighbour availability flags for current cu */ |
| ihevce_get_only_nbr_flag( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (cu_pos_x << 1), |
| (cu_pos_y << 1), |
| unit_4x4_size, |
| unit_4x4_size); |
| |
| cu_bits = ihevce_entropy_rdo_encode_cu( |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| ps_best_cu_prms, |
| cu_pos_x, |
| cu_pos_y, |
| cu_size, |
| ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail |
| : s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| (pu1_final_ecd_data - total_bytes), |
| &cbf_bits); |
| |
| /* cbf bits are excluded from header bits, instead considered as texture bits */ |
| ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits; |
| ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits; |
| } |
| |
| if(ps_prms->u1_will_cabac_state_change) |
| { |
| ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes; |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_set_eval_flags \endif |
| * |
| * \brief |
| * Function which decides which eval flags have to be set based on present |
| * and RDOQ conditions |
| * |
| * \param[in] ps_ctxt : encoder ctxt pointer |
| * \param[in] enc_loop_cu_final_prms_t : pointer to final cu params |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_set_eval_flags( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms) |
| { |
| WORD32 count = 0; |
| |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0; |
| |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
| |
| if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1))) |
| { |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0; |
| } |
| else |
| { |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1; |
| } |
| |
| if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) || |
| (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh)) |
| { |
| /* When rdoq is enabled only for the best candidate, in case of in Intra nTU |
| RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred |
| for the current CU will change. Therefore, we need to reevaluate the pred data*/ |
| if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) && |
| (ps_enc_loop_bestprms->u1_intra_flag == 1)) |
| { |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1; |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1; |
| } |
| if(ps_enc_loop_bestprms->u1_skip_flag == 1) |
| { |
| for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_luma_iq_and_coeff_data = 0; |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = 0; |
| } |
| } |
| else |
| { |
| for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_luma_iq_and_coeff_data = 1; |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = 1; |
| } |
| } |
| } |
| else |
| { |
| switch(ps_ctxt->i4_quality_preset) |
| { |
| case IHEVCE_QUALITY_P0: |
| case IHEVCE_QUALITY_P2: |
| case IHEVCE_QUALITY_P3: |
| { |
| for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_luma_iq_and_coeff_data = 0; |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
| } |
| |
| break; |
| } |
| case IHEVCE_QUALITY_P4: |
| case IHEVCE_QUALITY_P5: |
| { |
| for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_luma_iq_and_coeff_data = 0; |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
| } |
| |
| break; |
| } |
| case IHEVCE_QUALITY_P6: |
| { |
| for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_luma_iq_and_coeff_data = 0; |
| #if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25 |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
| #else |
| if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) && |
| (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2)) |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = |
| ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf; |
| } |
| else |
| { |
| ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
| .b1_eval_chroma_iq_and_coeff_data = |
| !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
| } |
| #endif |
| } |
| |
| break; |
| } |
| default: |
| { |
| break; |
| } |
| } |
| } |
| |
| /* Not recomputing Luma pred-data and header data for any preset now */ |
| ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1; |
| } |
| |
| /** |
| ****************************************************************************** |
| * |
| * @brief Shrink's TU tree of inter CUs by merging redundnant child nodes |
| * (not coded children) into a parent node(not coded). |
| * |
| * @par Description |
| * This is required post RDO evaluation as TU decisions are |
| * pre-determined(pre RDO) based on recursive SATD, |
| * while the quad children TU's can be skipped during RDO |
| * |
| * The shrink process is applied iteratively till there are no |
| * more modes to shrink |
| * |
| * @param[inout] ps_tu_enc_loop |
| * pointer to tu enc loop params of inter cu |
| * |
| * @param[inout] ps_tu_enc_loop_temp_prms |
| * pointer to temp tu enc loop params of inter cu |
| * |
| * @param[in] num_tu_in_cu |
| * number of tus in cu |
| * |
| * @return modified number of tus in cu |
| * |
| ****************************************************************************** |
| */ |
| WORD32 ihevce_shrink_inter_tu_tree( |
| tu_enc_loop_out_t *ps_tu_enc_loop, |
| tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms, |
| recon_datastore_t *ps_recon_datastore, |
| WORD32 num_tu_in_cu, |
| UWORD8 u1_is_422) |
| { |
| WORD32 recurse = 1; |
| WORD32 ctr; |
| |
| /* ------------- Quadtree TU Split Transform flag optimization ------------ */ |
| /* Post RDO, if all 4 child nodes are not coded the overheads of split TU */ |
| /* flags and cbf flags are saved by merging to parent node and marking */ |
| /* parent TU as not coded */ |
| /* */ |
| /* ParentTUSplit=1 */ |
| /* | */ |
| /* --------------------------------------------------------- */ |
| /* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */ |
| /* || */ |
| /* \/ */ |
| /* */ |
| /* ParentTUSplit=0 (Not Coded) */ |
| /* */ |
| /* ------------- Quadtree TU Split Transform flag optimization ------------ */ |
| while((num_tu_in_cu > 4) && recurse) |
| { |
| recurse = 0; |
| |
| /* Validate inter CU */ |
| //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */ |
| |
| /* loop for all tu blocks in current cu */ |
| for(ctr = 0; ctr < num_tu_in_cu;) |
| { |
| /* Get current tu posx, posy and size */ |
| WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2; |
| WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2; |
| /* +1 is for parents size */ |
| WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1); |
| |
| /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */ |
| WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0); |
| eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0); |
| |
| /* As TUs are published in encode order (Z SCAN), */ |
| /* Four consecutive TUS of same size implies we have hit leaf nodes. */ |
| if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) && |
| ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) && |
| ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) && |
| eval_merge) |
| { |
| WORD32 merge_parent = 1; |
| |
| /* If any leaf noded is coded, it cannot be merged to parent */ |
| if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) || |
| (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) || |
| |
| (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) || |
| (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) || |
| (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) || |
| |
| (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) || |
| (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) || |
| (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) || |
| |
| (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) || |
| (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) || |
| (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf)) |
| { |
| merge_parent = 0; |
| } |
| |
| if(u1_is_422) |
| { |
| if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) || |
| (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) || |
| |
| (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) || |
| (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) || |
| |
| (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) || |
| (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) || |
| |
| (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) || |
| (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1)) |
| { |
| merge_parent = 0; |
| } |
| } |
| |
| if(merge_parent) |
| { |
| /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */ |
| |
| if(ps_recon_datastore->u1_is_lumaRecon_available) |
| { |
| ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; |
| |
| memmove( |
| &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1], |
| &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4], |
| (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
| } |
| |
| if(ps_recon_datastore->au1_is_chromaRecon_available[0]) |
| { |
| ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] = |
| UCHAR_MAX; |
| ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] = |
| UCHAR_MAX; |
| |
| memmove( |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0], |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0], |
| (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
| |
| memmove( |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0], |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0], |
| (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
| |
| if(u1_is_422) |
| { |
| ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] = |
| UCHAR_MAX; |
| ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] = |
| UCHAR_MAX; |
| |
| memmove( |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1], |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1], |
| (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
| |
| memmove( |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1], |
| &ps_recon_datastore |
| ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1], |
| (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
| } |
| } |
| |
| /* Parent node size is one more than that of child */ |
| ps_tu_enc_loop[ctr].s_tu.b3_size++; |
| |
| ctr++; |
| |
| /* move the subsequent TUs to next element */ |
| ASSERT(num_tu_in_cu >= (ctr + 3)); |
| memmove( |
| (void *)(ps_tu_enc_loop + ctr), |
| (void *)(ps_tu_enc_loop + ctr + 3), |
| (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t)); |
| |
| /* Also memmove the temp TU params */ |
| memmove( |
| (void *)(ps_tu_enc_loop_temp_prms + ctr), |
| (void *)(ps_tu_enc_loop_temp_prms + ctr + 3), |
| (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t)); |
| |
| /* Number of TUs in CU are now less by 3 */ |
| num_tu_in_cu -= 3; |
| |
| /* Recurse again as new parent also be can be merged later */ |
| recurse = 1; |
| } |
| else |
| { |
| /* Go to next set of leaf nodes */ |
| ctr += 4; |
| } |
| } |
| else |
| { |
| ctr++; |
| } |
| } |
| } |
| |
| /* return the modified num TUs*/ |
| ASSERT(num_tu_in_cu > 0); |
| return (num_tu_in_cu); |
| } |
| |
| UWORD8 ihevce_intra_mode_nxn_hash_updater( |
| UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes) |
| { |
| WORD32 i; |
| WORD32 i4_mode; |
| |
| for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++) |
| { |
| if(pu1_mode_array[i] < 35) |
| { |
| if(pu1_mode_array[i] != 0) |
| { |
| i4_mode = pu1_mode_array[i] - 1; |
| |
| if(!pu1_hash_table[i4_mode]) |
| { |
| pu1_hash_table[i4_mode] = 1; |
| pu1_mode_array[u1_num_ipe_modes] = i4_mode; |
| u1_num_ipe_modes++; |
| } |
| } |
| |
| if(pu1_mode_array[i] != 34) |
| { |
| i4_mode = pu1_mode_array[i] + 1; |
| |
| if((!pu1_hash_table[i4_mode])) |
| { |
| pu1_hash_table[i4_mode] = 1; |
| pu1_mode_array[u1_num_ipe_modes] = i4_mode; |
| u1_num_ipe_modes++; |
| } |
| } |
| } |
| } |
| |
| if(!pu1_hash_table[INTRA_PLANAR]) |
| { |
| pu1_hash_table[INTRA_PLANAR] = 1; |
| pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR; |
| u1_num_ipe_modes++; |
| } |
| |
| if(!pu1_hash_table[INTRA_DC]) |
| { |
| pu1_hash_table[INTRA_DC] = 1; |
| pu1_mode_array[u1_num_ipe_modes] = INTRA_DC; |
| u1_num_ipe_modes++; |
| } |
| |
| return u1_num_ipe_modes; |
| } |
| |
| #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT |
| WORD32 ihevce_determine_tu_tree_distribution( |
| cu_inter_cand_t *ps_cu_data, |
| me_func_selector_t *ps_func_selector, |
| WORD16 *pi2_scratch_mem, |
| UWORD8 *pu1_inp, |
| WORD32 i4_inp_stride, |
| WORD32 i4_lambda, |
| UWORD8 u1_lambda_q_shift, |
| UWORD8 u1_cu_size, |
| UWORD8 u1_max_tr_depth) |
| { |
| err_prms_t s_err_prms; |
| |
| PF_SAD_FXN_TU_REC pf_err_compute[4]; |
| |
| WORD32 i4_satd; |
| |
| s_err_prms.pi4_sad_grid = &i4_satd; |
| s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag; |
| s_err_prms.pu1_inp = pu1_inp; |
| s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data; |
| s_err_prms.i4_inp_stride = i4_inp_stride; |
| s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride; |
| s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem; |
| |
| if(u1_cu_size == 64) |
| { |
| s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth); |
| } |
| else |
| { |
| s_err_prms.u1_max_tr_depth = u1_max_tr_depth; |
| } |
| |
| pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec; |
| pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec; |
| pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec; |
| pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec; |
| |
| i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4]( |
| &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector); |
| |
| if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64)) |
| { |
| ps_cu_data->ai4_tu_split_flag[0] = 1; |
| } |
| |
| return i4_satd; |
| } |
| #endif |
| |
| void ihevce_populate_nbr_4x4_with_pu_data( |
| nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride) |
| { |
| WORD32 i, j; |
| |
| nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4; |
| |
| WORD32 ht = (ps_pu->b4_ht + 1); |
| WORD32 wd = (ps_pu->b4_wd + 1); |
| |
| ps_nbr_4x4->b1_intra_flag = 0; |
| ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1); |
| ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0); |
| ps_nbr_4x4->mv = ps_pu->mv; |
| |
| for(i = 0; i < ht; i++) |
| { |
| for(j = 0; j < wd; j++) |
| { |
| ps_tmp_4x4[j] = *ps_nbr_4x4; |
| } |
| |
| ps_tmp_4x4 += i4_nbr_buf_stride; |
| } |
| } |
| |
| void ihevce_call_luma_inter_pred_rdopt_pass1( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size) |
| { |
| pu_t *ps_pu; |
| UWORD8 *pu1_pred; |
| WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0; |
| WORD32 inter_pu_wd, inter_pu_ht; |
| |
| pu1_pred = ps_inter_cand->pu1_pred_data_scr; |
| pred_stride = ps_inter_cand->i4_pred_data_stride; |
| num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1; |
| |
| for(ctr = 0; ctr < num_cu_part; ctr++) |
| { |
| ps_pu = &ps_inter_cand->as_inter_pu[ctr]; |
| |
| /* IF AMP then each partitions can have diff wd ht */ |
| inter_pu_wd = (ps_pu->b4_wd + 1) << 2; |
| inter_pu_ht = (ps_pu->b4_ht + 1) << 2; |
| |
| skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag; |
| //if(0 == skip_or_merge_flag) |
| { |
| ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1); |
| } |
| if((2 == num_cu_part) && (0 == ctr)) |
| { |
| /* 2Nx__ partion case */ |
| if(inter_pu_wd == cu_size) |
| { |
| pu1_pred += (inter_pu_ht * pred_stride); |
| } |
| |
| /* __x2N partion case */ |
| if(inter_pu_ht == cu_size) |
| { |
| pu1_pred += inter_pu_wd; |
| } |
| } |
| } |
| } |
| |
| LWORD64 ihevce_it_recon_ssd( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| UWORD8 *pu1_src, |
| WORD32 i4_src_strd, |
| UWORD8 *pu1_pred, |
| WORD32 i4_pred_strd, |
| WORD16 *pi2_deq_data, |
| WORD32 i4_deq_data_strd, |
| UWORD8 *pu1_recon, |
| WORD32 i4_recon_stride, |
| UWORD8 *pu1_ecd_data, |
| UWORD8 u1_trans_size, |
| UWORD8 u1_pred_mode, |
| WORD32 i4_cbf, |
| WORD32 i4_zero_col, |
| WORD32 i4_zero_row, |
| CHROMA_PLANE_ID_T e_chroma_plane) |
| { |
| if(NULL_PLANE == e_chroma_plane) |
| { |
| ihevce_it_recon_fxn( |
| ps_ctxt, |
| pi2_deq_data, |
| i4_deq_data_strd, |
| pu1_pred, |
| i4_pred_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| u1_trans_size, |
| u1_pred_mode, |
| i4_cbf, |
| i4_zero_col, |
| i4_zero_row); |
| |
| return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( |
| pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size, |
| e_chroma_plane); |
| } |
| else |
| { |
| ihevce_chroma_it_recon_fxn( |
| ps_ctxt, |
| pi2_deq_data, |
| i4_deq_data_strd, |
| pu1_pred, |
| i4_pred_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| u1_trans_size, |
| i4_cbf, |
| i4_zero_col, |
| i4_zero_row, |
| e_chroma_plane); |
| |
| return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
| pu1_recon, |
| pu1_src, |
| i4_recon_stride, |
| i4_src_strd, |
| u1_trans_size, |
| u1_trans_size, |
| e_chroma_plane); |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif |
| * |
| * \brief |
| * Transform unit level (Chroma) enc_loop function |
| * |
| * \param[in] ps_ctxt enc_loop module ctxt pointer |
| * \param[in] pu1_pred pointer to predicted data buffer |
| * \param[in] pred_strd predicted buffer stride |
| * \param[in] pu1_src pointer to source data buffer |
| * \param[in] src_strd source buffer stride |
| * \param[in] pi2_deq_data pointer to store iq data |
| * \param[in] deq_data_strd iq data buffer stride |
| * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
| * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current |
| * block |
| * \param[out] csbf_strd csbf buffer stride |
| * \param[in] trans_size transform size (4, 8, 16) |
| * \param[in] intra_flag 0:Inter/Skip 1:Intra |
| * \param[out] pi4_coeff_off pointer to store the number of bytes produced in |
| * coeff buffer |
| the current TU in RDopt Mode |
| * \param[out] pi4_zero_col pointer to store the zero_col info for the TU |
| * \param[out] pi4_zero_row pointer to store the zero_row info for the TU |
| * |
| * \return |
| * CBF of the current block |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| UWORD8 *pu1_pred, |
| WORD32 pred_strd, |
| UWORD8 *pu1_src, |
| WORD32 src_strd, |
| WORD16 *pi2_deq_data, |
| WORD32 deq_data_strd, |
| UWORD8 *pu1_recon, |
| WORD32 i4_recon_stride, |
| UWORD8 *pu1_ecd_data, |
| UWORD8 *pu1_csbf_buf, |
| WORD32 csbf_strd, |
| WORD32 trans_size, |
| WORD32 i4_scan_idx, |
| WORD32 intra_flag, |
| WORD32 *pi4_coeff_off, |
| WORD32 *pi4_tu_bits, |
| WORD32 *pi4_zero_col, |
| WORD32 *pi4_zero_row, |
| UWORD8 *pu1_is_recon_available, |
| WORD32 i4_perform_sbh, |
| WORD32 i4_perform_rdoq, |
| LWORD64 *pi8_cost, |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| WORD32 i4_alpha_stim_multiplier, |
| UWORD8 u1_is_cu_noisy, |
| #endif |
| UWORD8 u1_is_skip, |
| SSD_TYPE_T e_ssd_type, |
| CHROMA_PLANE_ID_T e_chroma_plane) |
| { |
| WORD32 trans_idx, cbf, u4_blk_sad; |
| WORD16 *pi2_quant_coeffs; |
| WORD16 *pi2_trans_values; |
| WORD32 quant_scale_mat_offset; |
| WORD32 *pi4_trans_scratch; |
| WORD32 *pi4_subBlock2csbfId_map = NULL; |
| |
| #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
| WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i; |
| #endif |
| |
| rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt; |
| |
| WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) || |
| (!intra_flag && ENABLE_INTER_ZCU_COST); |
| WORD32 i4_perform_coeff_level_rdoq = |
| (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) && |
| (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING); |
| |
| ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE)); |
| ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW); |
| |
| *pi4_coeff_off = 0; |
| *pi4_tu_bits = 0; |
| pu1_is_recon_available[0] = 0; |
| |
| pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0]; |
| pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; |
| pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2); |
| |
| if(2 == trans_size) |
| { |
| trans_size = 4; |
| } |
| |
| /* translate the transform size to index */ |
| trans_idx = trans_size >> 2; |
| |
| if(16 == trans_size) |
| { |
| trans_idx = 3; |
| } |
| |
| if(u1_is_skip) |
| { |
| pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
| pu1_pred, |
| pu1_src, |
| pred_strd, |
| src_strd, |
| trans_size, |
| trans_size, |
| e_chroma_plane); |
| |
| if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
| { |
| /* buffer copy fromp pred to recon */ |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_pred, |
| pred_strd, |
| pu1_recon, |
| i4_recon_stride, |
| trans_size, |
| trans_size, |
| e_chroma_plane); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi8_cost[0], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| e_chroma_plane); |
| } |
| #endif |
| |
| #if ENABLE_INTER_ZCU_COST |
| #if !WEIGH_CHROMA_COST |
| /* cbf = 0, accumulate cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; |
| #else |
| ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT; |
| #endif |
| #endif |
| |
| return 0; |
| } |
| |
| if(intra_flag == 1) |
| { |
| quant_scale_mat_offset = 0; |
| |
| #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
| ai4_quant_rounding_factors[0][0] = |
| MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3); |
| |
| for(i = 0; i < trans_size * trans_size; i++) |
| { |
| ai4_quant_rounding_factors[1][i] = |
| MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i], |
| (1 << QUANT_ROUND_FACTOR_Q) / 3); |
| ai4_quant_rounding_factors[2][i] = |
| MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i], |
| (1 << QUANT_ROUND_FACTOR_Q) / 3); |
| } |
| #endif |
| } |
| else |
| { |
| quant_scale_mat_offset = NUM_TRANS_TYPES; |
| } |
| |
| switch(trans_size) |
| { |
| case 4: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU; |
| |
| break; |
| } |
| case 8: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU; |
| |
| break; |
| } |
| case 16: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU; |
| |
| break; |
| } |
| case 32: |
| { |
| pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU; |
| |
| break; |
| } |
| } |
| |
| /* ---------- call residue and transform block ------- */ |
| u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1]( |
| pu1_src, |
| pu1_pred, |
| pi4_trans_scratch, |
| pi2_trans_values, |
| src_strd, |
| pred_strd, |
| trans_size, |
| e_chroma_plane); |
| (void)u4_blk_sad; |
| /* -------- calculate SSD calculation in Transform Domain ------ */ |
| |
| cbf = ps_ctxt->apf_quant_iquant_ssd |
| [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2] |
| |
| (pi2_trans_values, |
| ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset], |
| pi2_quant_coeffs, |
| pi2_deq_data, |
| trans_size, |
| ps_ctxt->i4_chrm_cu_qp_div6, |
| ps_ctxt->i4_chrm_cu_qp_mod6, |
| #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
| ps_ctxt->i4_quant_rnd_factor[intra_flag], |
| ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3], |
| ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3], |
| #else |
| intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag], |
| intra_flag ? ai4_quant_rounding_factors[1] |
| : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3], |
| intra_flag ? ai4_quant_rounding_factors[2] |
| : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3], |
| #endif |
| trans_size, |
| trans_size, |
| deq_data_strd, |
| pu1_csbf_buf, |
| csbf_strd, |
| pi4_zero_col, |
| pi4_zero_row, |
| ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset], |
| pi8_cost); |
| |
| if(e_ssd_type != FREQUENCY_DOMAIN_SSD) |
| { |
| pi8_cost[0] = UINT_MAX; |
| } |
| |
| if(0 != cbf) |
| { |
| if(i4_perform_sbh || i4_perform_rdoq) |
| { |
| ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd; |
| ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size; |
| |
| ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6; |
| ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6; |
| ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx; |
| ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; |
| ps_rdoq_sbh_ctxt->i4_trans_size = trans_size; |
| |
| ps_rdoq_sbh_ctxt->pi2_dequant_coeff = |
| ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset]; |
| ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data; |
| ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs; |
| ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values; |
| ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf; |
| ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map; |
| |
| if((!i4_perform_rdoq)) |
| { |
| ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
| |
| pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
| } |
| } |
| |
| /* ------- call coeffs scan function ------- */ |
| *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
| pi2_quant_coeffs, |
| pi4_subBlock2csbfId_map, |
| i4_scan_idx, |
| trans_size, |
| pu1_ecd_data, |
| pu1_csbf_buf, |
| csbf_strd); |
| } |
| |
| /* Normalize Cost. Note : trans_idx, not (trans_idx-1) */ |
| pi8_cost[0] >>= ga_trans_shift[trans_idx]; |
| |
| #if RDOPT_ZERO_CBF_ENABLE |
| if((0 != cbf)) |
| { |
| WORD32 tu_bits; |
| LWORD64 zero_cbf_cost_u, curr_cb_cod_cost; |
| |
| zero_cbf_cost_u = 0; |
| |
| /*Populating the feilds of rdoq_ctxt structure*/ |
| if(i4_perform_rdoq) |
| { |
| //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t)); |
| /* transform size to log2transform size */ |
| GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size); |
| ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1; |
| |
| ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf; |
| ps_rdoq_sbh_ctxt->i4_is_luma = 0; |
| ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx]; |
| ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td = |
| (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1)); |
| ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0; |
| ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col; |
| ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row; |
| } |
| else if(i4_perform_zcbf) |
| { |
| /* cost of zero cbf encoding */ |
| zero_cbf_cost_u = |
| |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
| pu1_pred, |
| pu1_src, |
| pred_strd, |
| src_strd, |
| trans_size, |
| trans_size, |
| e_chroma_plane); |
| } |
| |
| /************************************************************************/ |
| /* call the entropy rdo encode to get the bit estimate for current tu */ |
| /* note that tu includes only residual coding bits and does not include */ |
| /* tu split, cbf and qp delta encoding bits for a TU */ |
| /************************************************************************/ |
| if(i4_perform_rdoq) |
| { |
| tu_bits = ihevce_entropy_rdo_encode_tu_rdoq( |
| &ps_ctxt->s_rdopt_entropy_ctxt, |
| pu1_ecd_data, |
| trans_size, |
| 0, |
| ps_rdoq_sbh_ctxt, |
| pi8_cost, |
| &zero_cbf_cost_u, |
| 0); |
| //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on |
| |
| if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0) |
| { |
| cbf = 0; |
| |
| /* num bytes is set to 0 */ |
| *pi4_coeff_off = 0; |
| } |
| |
| (*pi4_tu_bits) += tu_bits; |
| |
| if((i4_perform_sbh) && (0 != cbf)) |
| { |
| ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0]; |
| |
| ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
| |
| pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
| } |
| |
| /*Add round value before normalizing*/ |
| pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td; |
| pi8_cost[0] >>= ga_trans_shift[trans_idx]; |
| |
| if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1) |
| { |
| *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
| pi2_quant_coeffs, |
| pi4_subBlock2csbfId_map, |
| i4_scan_idx, |
| trans_size, |
| pu1_ecd_data, |
| ps_rdoq_sbh_ctxt->pu1_csbf_buf, |
| csbf_strd); |
| } |
| } |
| else |
| { |
| /************************************************************************/ |
| /* call the entropy rdo encode to get the bit estimate for current tu */ |
| /* note that tu includes only residual coding bits and does not include */ |
| /* tu split, cbf and qp delta encoding bits for a TU */ |
| /************************************************************************/ |
| tu_bits = ihevce_entropy_rdo_encode_tu( |
| &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh); |
| |
| (*pi4_tu_bits) += tu_bits; |
| } |
| |
| if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
| { |
| pi8_cost[0] = ihevce_it_recon_ssd( |
| ps_ctxt, |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi2_deq_data, |
| deq_data_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| trans_size, |
| PRED_MODE_INTRA, |
| cbf, |
| pi4_zero_col[0], |
| pi4_zero_row[0], |
| e_chroma_plane); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pi8_cost[0], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| e_chroma_plane); |
| } |
| else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi8_cost[0], |
| i4_alpha_stim_multiplier, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| e_chroma_plane); |
| } |
| #endif |
| |
| curr_cb_cod_cost = pi8_cost[0]; |
| |
| /* add the SSD cost to bits estimate given by ECD */ |
| curr_cb_cod_cost += |
| COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
| |
| if(i4_perform_zcbf) |
| { |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
| { |
| zero_cbf_cost_u = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| zero_cbf_cost_u, |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| e_chroma_plane); |
| } |
| #endif |
| /* force the tu as zero cbf if zero_cbf_cost is lower */ |
| if(zero_cbf_cost_u < curr_cb_cod_cost) |
| { |
| *pi4_coeff_off = 0; |
| cbf = 0; |
| (*pi4_tu_bits) = 0; |
| pi8_cost[0] = zero_cbf_cost_u; |
| |
| pu1_is_recon_available[0] = 0; |
| |
| if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
| { |
| ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
| pu1_pred, |
| pred_strd, |
| pu1_recon, |
| i4_recon_stride, |
| trans_size, |
| trans_size, |
| e_chroma_plane); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| } |
| |
| #if ENABLE_INTER_ZCU_COST |
| if(!intra_flag) |
| { |
| #if !WEIGH_CHROMA_COST |
| ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u; |
| #else |
| ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
| (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| } |
| #endif |
| } |
| } |
| else |
| { |
| if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
| { |
| pi8_cost[0] = ihevce_it_recon_ssd( |
| ps_ctxt, |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi2_deq_data, |
| deq_data_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pu1_ecd_data, |
| trans_size, |
| PRED_MODE_INTRA, |
| cbf, |
| pi4_zero_col[0], |
| pi4_zero_row[0], |
| e_chroma_plane); |
| |
| pu1_is_recon_available[0] = 1; |
| } |
| |
| #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
| if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_recon, |
| i4_recon_stride, |
| pi8_cost[0], |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| e_chroma_plane); |
| } |
| else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
| { |
| pi8_cost[0] = ihevce_inject_stim_into_distortion( |
| pu1_src, |
| src_strd, |
| pu1_pred, |
| pred_strd, |
| pi8_cost[0], |
| !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
| : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
| (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
| 100.0, |
| trans_size, |
| 0, |
| ps_ctxt->u1_enable_psyRDOPT, |
| e_chroma_plane); |
| } |
| #endif |
| |
| #if ENABLE_INTER_ZCU_COST |
| if(!intra_flag) |
| { |
| #if !WEIGH_CHROMA_COST |
| /* cbf = 0, accumulate cu not coded cost */ |
| ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; |
| #else |
| /* cbf = 0, accumulate cu not coded cost */ |
| |
| ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
| (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor + |
| (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
| CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
| #endif |
| } |
| #endif |
| } |
| #endif /* RDOPT_ZERO_CBF_ENABLE */ |
| |
| return (cbf); |
| } |